{"id":"https://openalex.org/W4416203904","doi":"https://doi.org/10.1145/3712285.3759808","title":"ODOS-MPI: HPC-Friendly SmartNIC Offloading of Computation/Communication Kernels","display_name":"ODOS-MPI: HPC-Friendly SmartNIC Offloading of Computation/Communication Kernels","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416203904","doi":"https://doi.org/10.1145/3712285.3759808"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759808","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759808","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101458148","display_name":"Muhammad Usman","orcid":"https://orcid.org/0009-0009-6312-8355"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Muhammad Usman","raw_affiliation_strings":["Barcelona Supercomputing Center (BSC), Barcelona, Spain"],"raw_orcid":"https://orcid.org/0009-0009-6312-8355","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center (BSC), Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090512584","display_name":"Mariano Benito","orcid":"https://orcid.org/0000-0001-6611-7015"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Mariano Benito","raw_affiliation_strings":["Barcelona Supercomputing Center (BSC), Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0001-6611-7015","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center (BSC), Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006767413","display_name":"Sergio Iserte","orcid":"https://orcid.org/0000-0003-3654-7924"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Sergio Iserte","raw_affiliation_strings":["Barcelona Supercomputing Center (BSC), Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0003-3654-7924","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center (BSC), Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000573036","display_name":"Antonio J. Pe\u00f1a","orcid":"https://orcid.org/0000-0002-3575-4617"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Antonio J. Pe\u00f1a","raw_affiliation_strings":["Barcelona Supercomputing Center (BSC), Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0002-3575-4617","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center (BSC), Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101458148"],"corresponding_institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.36843188,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1006","last_page":"1027"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7511000037193298,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7511000037193298,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.09709999710321426,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.035999998450279236,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computation-offloading","display_name":"Computation offloading","score":0.6769999861717224},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6208999752998352},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6139000058174133},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5670999884605408},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.507099986076355},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.4399000108242035},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.41130000352859497},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.37860000133514404}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8636000156402588},{"id":"https://openalex.org/C2781041963","wikidata":"https://www.wikidata.org/wiki/Q18348618","display_name":"Computation offloading","level":4,"score":0.6769999861717224},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6208999752998352},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6169000267982483},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6139000058174133},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5670999884605408},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.4399000108242035},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.40290001034736633},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.35109999775886536},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.27950000762939453},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C150495011","wikidata":"https://www.wikidata.org/wiki/Q128392","display_name":"Concurrent computing","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759808","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759808","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2068020713","https://openalex.org/W3046407074","https://openalex.org/W3165940308","https://openalex.org/W4200392450","https://openalex.org/W4206101395","https://openalex.org/W4221092982","https://openalex.org/W4294672401","https://openalex.org/W4296338881","https://openalex.org/W4321782160","https://openalex.org/W4384705383","https://openalex.org/W4388581255","https://openalex.org/W4393575823","https://openalex.org/W4396814849","https://openalex.org/W4402622900","https://openalex.org/W4405756373","https://openalex.org/W4407784357","https://openalex.org/W4407784448"],"related_works":[],"abstract_inverted_index":{"The":[0],"increasing":[1],"complexity":[2],"and":[3,17,28,108,125],"scale":[4],"of":[5,103,113],"high-performance":[6],"computing":[7],"(HPC)":[8],"workloads":[9],"demand":[10],"innovative":[11],"approaches":[12],"to":[13,48,53,122],"optimize":[14],"both":[15],"computation":[16,124],"communication.":[18,126],"While":[19],"OpenMP":[20,47,78],"has":[21],"been":[22],"widely":[23],"adopted":[24],"for":[25,30,38,70,82,90],"intra-node":[26],"parallelism":[27],"MPI":[29,50,59,105],"inter-node":[31],"communication,":[32],"emerging":[33],"SmartNICs":[34],"introduce":[35],"new":[36],"opportunities":[37],"offloading":[39,52,61],"communication-intensive":[40],"tasks.":[41],"In":[42],"this":[43],"work,":[44],"we":[45],"extend":[46],"support":[49],"kernel":[51],"SmartNICs.":[54],"Our":[55],"implementation":[56],"integrates":[57],"Open":[58],"communication":[60],"into":[62],"the":[63,80,87,104,110],"LLVM":[64],"compiler":[65],"while":[66],"utilizing":[67],"DOCA":[68],"SDK":[69],"efficient":[71],"interaction":[72],"with":[73],"Nvidia":[74],"BlueField":[75],"DPUs.":[76],"Leveraging":[77],"eliminates":[79],"need":[81],"direct":[83],"low-level":[84],"programming,":[85],"lowering":[86],"entry":[88],"barrier":[89],"domain":[91],"scientists.":[92],"We":[93],"demonstrate":[94],"our":[95],"framework\u2019s":[96],"versatility":[97],"by":[98,118],"implementing":[99],"a":[100],"SmartNIC-enabled":[101],"version":[102],"OSU":[106],"micro-benchmarks":[107],"improving":[109],"execution":[111],"time":[112],"an":[114],"atmospheric":[115],"weather":[116],"simulation":[117],"over":[119],"18%,":[120],"thanks":[121],"concurrent":[123]},"counts_by_year":[],"updated_date":"2025-11-28T07:45:33.341878","created_date":"2025-11-12T00:00:00"}
