{"id":"https://openalex.org/W2519996712","doi":"https://doi.org/10.1109/hpcsim.2016.7568422","title":"Characterizing numascale clusters with GPUs: MPI-based and GPU interconnect benchmarks","display_name":"Characterizing numascale clusters with GPUs: MPI-based and GPU interconnect benchmarks","publication_year":2016,"publication_date":"2016-07-01","ids":{"openalex":"https://openalex.org/W2519996712","doi":"https://doi.org/10.1109/hpcsim.2016.7568422","mag":"2519996712"},"language":"en","primary_location":{"id":"doi:10.1109/hpcsim.2016.7568422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033352276","display_name":"Malik M. Khan","orcid":"https://orcid.org/0000-0002-5663-9335"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Malik M. Khan","raw_affiliation_strings":["Dept. of Computer and Info. Science (IDI), NTNU, Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer and Info. Science (IDI), NTNU, Trondheim, Norway","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045535996","display_name":"Anne C. Elster","orcid":"https://orcid.org/0000-0002-7087-6193"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anne C. Elster","raw_affiliation_strings":["Dept. of Computer and Info. Science (IDI), NTNU, Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer and Info. Science (IDI), NTNU, Trondheim, Norway","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033352276"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3153,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.57889787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"840","last_page":"847"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8780678510665894},{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.7047275900840759},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6157320141792297},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5606604814529419},{"id":"https://openalex.org/keywords/spmd","display_name":"SPMD","score":0.5146406888961792},{"id":"https://openalex.org/keywords/myrinet","display_name":"Myrinet","score":0.46753188967704773},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.46607136726379395},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.45457571744918823},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4465583562850952}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8780678510665894},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.7047275900840759},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6157320141792297},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5606604814529419},{"id":"https://openalex.org/C7042729","wikidata":"https://www.wikidata.org/wiki/Q2289219","display_name":"SPMD","level":2,"score":0.5146406888961792},{"id":"https://openalex.org/C2780601250","wikidata":"https://www.wikidata.org/wiki/Q1863181","display_name":"Myrinet","level":3,"score":0.46753188967704773},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.46607136726379395},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.45457571744918823},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4465583562850952},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcsim.2016.7568422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W138041807","https://openalex.org/W1598379566","https://openalex.org/W1874887450","https://openalex.org/W1983545421","https://openalex.org/W1994209517","https://openalex.org/W2012313745","https://openalex.org/W2036853599","https://openalex.org/W2057897376","https://openalex.org/W2080592089","https://openalex.org/W2098815550","https://openalex.org/W2117285153","https://openalex.org/W2144740242","https://openalex.org/W2169875292","https://openalex.org/W2259305999","https://openalex.org/W2271539387","https://openalex.org/W2407319002","https://openalex.org/W2554811052","https://openalex.org/W2983421681","https://openalex.org/W4239023471","https://openalex.org/W6639346831","https://openalex.org/W6714041424","https://openalex.org/W6903632854"],"related_works":["https://openalex.org/W2104094072","https://openalex.org/W4246679332","https://openalex.org/W1978254186","https://openalex.org/W2141244992","https://openalex.org/W4237336225","https://openalex.org/W2538337951","https://openalex.org/W2099148634","https://openalex.org/W1486544172","https://openalex.org/W2007129194","https://openalex.org/W1531092195"],"abstract_inverted_index":{"Modern":[0],"HPC":[1],"clusters":[2],"are":[3],"increasingly":[4],"heterogeneous":[5],"both":[6],"in":[7,86],"processor":[8],"types,":[9],"topologies":[10],"of":[11],"computing,":[12],"communication":[13,205],"and":[14,74,147,196,206],"storage":[15],"resources.":[16],"In":[17,191],"this":[18],"paper,":[19],"we":[20,134,199],"describe":[21],"how":[22],"to":[23,26,91,180],"use":[24,135,181,187],"benchmarking,":[25],"characterize":[27,130],"the":[28,57,99,119,155,160,188],"high-speed":[29],"interconnect,":[30],"NumaConnect,":[31,54],"associated":[32],"with":[33,39],"a":[34,42,50,76,88],"shared-memory":[35,78],"Numascale":[36,47,132],"cluster":[37],"system":[38,64,84],"GPUs,":[40],"constituting":[41],"novel":[43],"testbed":[44],"at":[45],"NTNU.":[46],"systems":[48],"include":[49],"unique":[51],"node":[52],"controller,":[53],"based":[55],"on":[56,63,193,204,209],"FPGA":[58],"or":[59,127,183],"ASIC-based":[60],"NumaChip,":[61],"depending":[62],"vendor":[65],"requirements.":[66],"The":[67,109],"system's":[68],"interconnects":[69],"uses":[70],"AMD's":[71],"HyperTransport":[72],"protocol,":[73],"provide":[75],"cache-coherent":[77],"single":[79],"image":[80],"operating":[81],"system.":[82],"Our":[83,95,171],"has,":[85],"addition,":[87],"GPU":[89],"added":[90],"each":[92],"server":[93],"blade.":[94],"characterizations":[96],"efforts":[97],"target":[98],"NumaConnect":[100,120],"which":[101],"includes":[102,144],"an":[103],"RDMA-type":[104],"Block":[105],"Transfer":[106,115],"Engine":[107],"(BTE).":[108],"BTE":[110],"is":[111,176],"used":[112],"by":[113],"Byte":[114],"Libraries":[116],"such":[117],"as":[118,163,165],"BTL":[121],"(NC-BTL)":[122],"for":[123,169],"message":[124],"passing":[125],"(MPI)":[126],"BLACS.":[128],"To":[129],"our":[131,140],"system,":[133],"several":[136],"benchmark":[137,153],"suites":[138],"including:":[139],"own":[141],"SimpleBench":[142],"that":[143,174,186],"ping-pong,":[145],"MPI-Reduce":[146],"MPI-Barrier":[148],"tests;":[149],"two":[150],"well-known":[151],"MPI":[152,182],"suites:":[154],"NAS":[156],"Parallel":[157],"Benchmarks":[158],"(NPB)-MPI,":[159],"OSU":[161,195],"microbenchmarks;":[162],"well":[164],"Nvidia's":[166],"Bandwidth":[167],"test":[168],"GPUs.":[170],"results":[172],"show":[173],"it":[175],"generally":[177],"very":[178],"beneficial":[179],"other":[184],"libraries":[185],"NC-BTL":[189],"library.":[190],"fact,":[192],"selected":[194],"NPB":[197],"benchmarks,":[198],"achieve":[200],"order-of-magnitude":[201],"performance":[202],"improvements":[203],"synchronization":[207],"costs":[208],"these":[210],"benchmarks":[211],"when":[212],"using":[213],"NC-BTL.":[214]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
