{"id":"https://openalex.org/W4400798871","doi":"https://doi.org/10.1145/3626203.3670549","title":"Design and Implementation of an IPC-based Collective MPI Library for Intel GPUs","display_name":"Design and Implementation of an IPC-based Collective MPI Library for Intel GPUs","publication_year":2024,"publication_date":"2024-07-17","ids":{"openalex":"https://openalex.org/W4400798871","doi":"https://doi.org/10.1145/3626203.3670549"},"language":"en","primary_location":{"id":"doi:10.1145/3626203.3670549","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626203.3670549","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing 2024: Human Powered Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013189498","display_name":"Chen-Chun Chen","orcid":"https://orcid.org/0000-0002-7471-7552"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chen-Chun Chen","raw_affiliation_strings":["Computer Science and Engineering, The Ohio State University, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071358621","display_name":"Goutham Kalikrishna Reddy Kuncham","orcid":"https://orcid.org/0000-0003-2112-4769"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goutham Kalikrishna Reddy Kuncham","raw_affiliation_strings":["Computer Science and Engineering, The Ohio State University, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057608531","display_name":"Pouya Kousha","orcid":"https://orcid.org/0009-0004-7507-0940"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pouya Kousha","raw_affiliation_strings":["Computer Science and Engineering, The Ohio State University, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034293705","display_name":"Hari Subramoni","orcid":"https://orcid.org/0000-0002-1200-2754"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hari Subramoni","raw_affiliation_strings":["Computer Science and Engineering, The Ohio State University, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["Computer Science and Engineering, The Ohio State University, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013189498"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":1.0396,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74760628,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8362894058227539},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7033919095993042},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.564673125743866},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.49444007873535156}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8362894058227539},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7033919095993042},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.564673125743866},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.49444007873535156}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626203.3670549","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626203.3670549","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing 2024: Human Powered Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1637731592","https://openalex.org/W1964981582","https://openalex.org/W2025388037","https://openalex.org/W2070940137","https://openalex.org/W2991848348","https://openalex.org/W2992165038","https://openalex.org/W2998114460","https://openalex.org/W3036255981","https://openalex.org/W3155528695","https://openalex.org/W3166649773","https://openalex.org/W3203693205","https://openalex.org/W4200480415","https://openalex.org/W4289827868","https://openalex.org/W4383749601"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"With":[0],"the":[1,54,152,159,165,221,227],"rising":[2],"demand":[3],"for":[4,83,99,119,190,220],"computing":[5],"power":[6],"in":[7,19],"High-Performance":[8],"Computing":[9],"and":[10,33,37,96,141,161,181],"Deep":[11],"Learning":[12],"applications,":[13],"there":[14],"is":[15,72],"a":[16,116,185,199,217],"noticeable":[17],"trend":[18],"outfitting":[20],"modern":[21,66],"exascale":[22],"clusters":[23],"with":[24,173],"accelerators.":[25],"In":[26,90,176,208],"recent":[27],"years,":[28],"Intel":[29,49,84,107,166,170],"has":[30],"been":[31],"designing":[32],"developing":[34],"GPU":[35,108],"products":[36],"their":[38,46],"associated":[39],"ecosystems.":[40],"Concurrently,":[41],"application":[42,162,223],"developers":[43],"are":[44],"transitioning":[45],"programs":[47],"to":[48,52,74,86,146,204,216,226],"GPUs,":[50],"seeking":[51],"maximize":[53],"computational":[55],"capabilities":[56],"of":[57,154],"multi-GPU":[58],"systems":[59],"by":[60,65],"utilizing":[61,168],"efficient":[62,77],"communication":[63,88],"facilitated":[64],"GPU-aware":[67],"MPI":[68,78,103],"libraries.":[69],"Hence,":[70],"it":[71],"critical":[73],"design":[75,118],"an":[76],"collective":[79,102],"library":[80],"specifically":[81],"tailored":[82],"GPUs":[85,171],"optimize":[87],"performance.":[89,149],"this":[91],"paper,":[92],"we":[93,114,136],"proposed":[94,212],"hybrid":[95],"IPC-based":[97],"designs":[98,156,213],"data":[100,120],"movement":[101,121],"operations":[104,195],"on":[105,126,164],"contemporary":[106],"systems.":[109],"For":[110,133],"large":[111,191],"message":[112],"communication,":[113],"developed":[115],"comprehensive":[117],"collectives":[122],"that":[123],"surpasses":[124],"reliance":[125],"basic":[127],"send/recv":[128],"pairs,":[129],"effectively":[130],"minimizing":[131],"overheads.":[132],"small":[134],"messages,":[135,192],"employ":[137],"CPU":[138],"staging":[139],"techniques":[140],"compare":[142],"various":[143],"underlying":[144],"libraries":[145],"ensure":[147],"optimal":[148],"We":[150],"evaluate":[151],"benefits":[153],"our":[155,179,211],"at":[157,206],"both":[158],"benchmark":[160],"layers":[163],"DevCloud,":[167],"4":[169],"connected":[172],"Xe":[174],"Links.":[175],"benchmark-level":[177],"evaluations,":[178,210],"Alltoall":[180],"Allgather":[182],"implementations":[183],"show":[184],"constant":[186],"100":[187],"\u00b5s":[188],"improvement":[189,219],"while":[193],"other":[194],"like":[196],"Bcast":[197],"achieve":[198],"72x":[200],"performance":[201],"enhancement":[202],"compared":[203,225],"MPICH":[205],"32MB.":[207],"application-level":[209],"demonstrate":[214],"up":[215],"30%":[218],"HPC":[222],"heFFTe":[224],"second-best":[228],"solution":[229],"using":[230],"MPICH.":[231]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
