{"id":"https://openalex.org/W4413145469","doi":"https://doi.org/10.1109/ipdpsw66978.2025.00017","title":"nbshmem: Enabling GPU-Initiated Multi-GPU Communication in Python","display_name":"nbshmem: Enabling GPU-Initiated Multi-GPU Communication in Python","publication_year":2025,"publication_date":"2025-06-03","ids":{"openalex":"https://openalex.org/W4413145469","doi":"https://doi.org/10.1109/ipdpsw66978.2025.00017"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw66978.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw66978.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119281932","display_name":"Calvin Bombis","orcid":null},"institutions":[{"id":"https://openalex.org/I120691247","display_name":"University of Hagen","ror":"https://ror.org/04tkkr536","country_code":"DE","type":"education","lineage":["https://openalex.org/I120691247"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Calvin Bombis","raw_affiliation_strings":["University of Hagen,Computer Engineering,Hagen,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hagen,Computer Engineering,Hagen,Germany","institution_ids":["https://openalex.org/I120691247"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011121841","display_name":"Lena Oden","orcid":"https://orcid.org/0000-0002-9670-5296"},"institutions":[{"id":"https://openalex.org/I120691247","display_name":"University of Hagen","ror":"https://ror.org/04tkkr536","country_code":"DE","type":"education","lineage":["https://openalex.org/I120691247"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lena Oden","raw_affiliation_strings":["University of Hagen,Computer Engineering,Hagen,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hagen,Computer Engineering,Hagen,Germany","institution_ids":["https://openalex.org/I120691247"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5119281932"],"corresponding_institution_ids":["https://openalex.org/I120691247"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11591005,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"68","last_page":"77"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9146000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9146000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.820793867111206},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.7674102783203125},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5546389222145081},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5250290036201477},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3630495071411133},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.25347426533699036},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.16613394021987915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.820793867111206},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.7674102783203125},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5546389222145081},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5250290036201477},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3630495071411133},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25347426533699036},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.16613394021987915}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdpsw66978.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw66978.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2341492732","https://openalex.org/W3187193180","https://openalex.org/W106542691","https://openalex.org/W1699080303","https://openalex.org/W4297799326","https://openalex.org/W3116064965","https://openalex.org/W1980160788"],"abstract_inverted_index":{"In":[0],"this":[1,110],"paper":[2,103],"we":[3,117,146],"present":[4],"nbshmem,":[5],"a":[6,68,113,148,188],"Python":[7,57,78,194],"library":[8,14],"for":[9,98,140,173,206],"GPU-initiated":[10],"GPU-to-GPU":[11],"communication.":[12],"The":[13],"can":[15,165],"be":[16],"used":[17],"within":[18],"Numba":[19],"CUDA":[20],"kernels":[21],"that":[22,56,131,152,163],"are":[23,96],"compiled":[24],"into":[25],"GPU":[26,195,199],"device":[27],"code":[28],"at":[29],"runtime.":[30],"nbshmem":[31,215],"is":[32,55,87],"designed":[33],"with":[34],"NVSHMEM":[35],"in":[36,93,124,193],"mind,":[37],"but":[38],"its":[39],"implementation":[40,216],"poses":[41],"several":[42,120],"challenges":[43],"due":[44],"to":[45,50,66,80,109,220],"Python\u2019s":[46],"language":[47],"constraints":[48],"compared":[49],"C.":[51],"One":[52],"major":[53,189],"challenge":[54,86],"does":[58],"not":[59],"support":[60],"pointer":[61],"arithmetic,":[62],"making":[63],"it":[64],"difficult":[65],"realise":[67],"symmetric":[69],"address":[70],"space.":[71],"To":[72,143],"overcome":[73],"this,":[74],"our":[75,214],"approach":[76],"uses":[77],"tuples":[79],"manage":[81],"shared":[82],"memory":[83],"objects.":[84],"Another":[85],"the":[88],"lack":[89],"of":[90,115,132,191],"volatile":[91],"operations":[92],"Python,":[94],"which":[95],"required":[97],"certain":[99],"synchronisation":[100],"mechanisms.":[101],"This":[102],"presents":[104],"and":[105,136,176,185],"evaluates":[106],"different":[107],"solutions":[108],"problem.":[111],"As":[112],"proof":[114],"concept,":[116],"have":[118],"implemented":[119,147],"collective":[121],"operations.":[122],"However,":[123],"most":[125],"cases":[126],"their":[127],"performance":[128,167,218],"remains":[129,201],"below":[130],"NCCL":[133],"(in":[134],"Python)":[135],"NVSHMEM,":[137,204],"highlighting":[138],"areas":[139],"potential":[141],"improvement.":[142],"evaluate":[144],"performance,":[145],"stencil":[149,212],"computation":[150],"kernel":[151,183],"requires":[153],"frequent":[154,182],"data":[155],"exchange":[156],"between":[157],"neighbouring":[158],"GPUs.":[159],"Our":[160],"results":[161],"show":[162],"<nbshmem":[164],"provide":[166],"advantages":[168],"over":[169],"existing":[170],"Python-based":[171],"approaches":[172],"both":[174],"small":[175,207],"large":[177],"problem":[178,208],"sizes":[179],"by":[180],"avoiding":[181],"starts":[184],"device/CPU":[186],"synchronisation,":[187],"source":[190],"overhead":[192],"applications.":[196],"Nevertheless,":[197],"Pythonbased":[198],"communication":[200],"slower":[202],"than":[203],"especially":[205],"sizes.":[209],"For":[210],"larger":[211],"domains,":[213],"achieves":[217],"comparable":[219],"NVSHMEM.":[221]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
