{"id":"https://openalex.org/W2915358162","doi":"https://doi.org/10.1109/cahpc.2018.8645874","title":"Towards a Single-Host Many-GPU System","display_name":"Towards a Single-Host Many-GPU System","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W2915358162","doi":"https://doi.org/10.1109/cahpc.2018.8645874","mag":"2915358162"},"language":"en","primary_location":{"id":"doi:10.1109/cahpc.2018.8645874","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645874","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067683700","display_name":"Ming\u2010Hung Chen","orcid":"https://orcid.org/0000-0003-2388-5086"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ming-Hung Chen","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108479300","display_name":"I\u2010Hsin Chung","orcid":"https://orcid.org/0000-0003-4555-9257"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"I.-Hsin Chung","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062158919","display_name":"B\u00fclent Abali","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bulent Abali","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009418528","display_name":"P. Crumley","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paul Crumley","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5067683700"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":0.263,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56679456,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"140","last_page":"147"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pci-express","display_name":"PCI Express","score":0.9576030969619751},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.9424675107002258},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8518045544624329},{"id":"https://openalex.org/keywords/host","display_name":"Host (biology)","score":0.7975058555603027},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7869554758071899},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6122704744338989},{"id":"https://openalex.org/keywords/interconnection","display_name":"Interconnection","score":0.6106439232826233},{"id":"https://openalex.org/keywords/payload","display_name":"Payload (computing)","score":0.5952740907669067},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.47630369663238525},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4619278311729431},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.42578500509262085},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.41094741225242615},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3903399109840393},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3461647629737854},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3252764642238617},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.19423896074295044},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1699504554271698}],"concepts":[{"id":"https://openalex.org/C64270927","wikidata":"https://www.wikidata.org/wiki/Q206924","display_name":"PCI Express","level":3,"score":0.9576030969619751},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.9424675107002258},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518045544624329},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.7975058555603027},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7869554758071899},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6122704744338989},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.6106439232826233},{"id":"https://openalex.org/C134066672","wikidata":"https://www.wikidata.org/wiki/Q1424639","display_name":"Payload (computing)","level":3,"score":0.5952740907669067},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.47630369663238525},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4619278311729431},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.42578500509262085},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41094741225242615},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3903399109840393},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3461647629737854},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3252764642238617},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.19423896074295044},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1699504554271698},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cahpc.2018.8645874","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645874","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W164384110","https://openalex.org/W1980759071","https://openalex.org/W2051267043","https://openalex.org/W2090943890","https://openalex.org/W2106329447","https://openalex.org/W2117539524","https://openalex.org/W2122002474","https://openalex.org/W2149234156","https://openalex.org/W2155893237","https://openalex.org/W2194775991","https://openalex.org/W2521708680","https://openalex.org/W2561119724","https://openalex.org/W2562243536","https://openalex.org/W2606722458","https://openalex.org/W2751543632","https://openalex.org/W2753239506"],"related_works":["https://openalex.org/W4385894176","https://openalex.org/W2595172197","https://openalex.org/W2347371119","https://openalex.org/W2084856301","https://openalex.org/W2612768808","https://openalex.org/W3131402800","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W2388965158","https://openalex.org/W2915358162"],"abstract_inverted_index":{"As":[0],"computation-intensive":[1],"tasks":[2],"such":[3],"as":[4,36,90],"deep":[5],"learning":[6],"and":[7,57,136],"big":[8],"data":[9,55,71],"analysis":[10],"take":[11],"advantage":[12],"of":[13,33,39,101,133],"GPU":[14],"based":[15],"accelerators,":[16],"the":[17,29,37,48,54,61,64,70,77,80,94,99,114,121,134],"interconnection":[18,81],"links":[19],"may":[20],"become":[21],"a":[22],"bottleneck.":[23],"In":[24],"this":[25],"paper,":[26],"we":[27],"investigate":[28],"upcoming":[30],"performance":[31,85],"bottleneck":[32,78],"multi-accelerator":[34],"systems,":[35],"number":[38],"accelerators":[40],"equipped":[41],"with":[42,60,105],"single":[43],"host":[44,49,102],"grows.":[45],"We":[46,97,111],"instrumented":[47],"PCIe":[50,138],"fabric":[51],"to":[52,75,93,107,119],"measure":[53],"transfer":[56,72],"compared":[58],"it":[59],"measurements":[62],"from":[63],"software":[65],"tool.":[66],"It":[67],"shows":[68,125],"how":[69],"(P2P)":[73],"helps":[74],"avoid":[76],"on":[79,117],"links,":[82],"but":[83],"multi-GPU":[84],"does":[86],"not":[87],"scale":[88],"up":[89],"expected":[91],"due":[92],"control":[95,103],"messages.":[96],"quantify":[98],"impact":[100],"messages":[104],"suggestions":[106],"remedy":[108],"scalability":[109],"bottlenecks.":[110],"also":[112],"implement":[113],"proposed":[115],"strategy":[116,127],"Lulesh":[118],"validate":[120],"concept.":[122],"The":[123],"result":[124],"our":[126],"can":[128],"save":[129],"59.86%":[130],"time":[131],"cost":[132],"kernel":[135],"13.32%":[137],"H2D":[139],"payload.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
