{"id":"https://openalex.org/W4200246580","doi":"https://doi.org/10.1109/mm.2021.3139092","title":"Accelerating Allreduce With In-Network Reduction on Intel PIUMA","display_name":"Accelerating Allreduce With In-Network Reduction on Intel PIUMA","publication_year":2021,"publication_date":"2021-12-30","ids":{"openalex":"https://openalex.org/W4200246580","doi":"https://doi.org/10.1109/mm.2021.3139092"},"language":"en","primary_location":{"id":"doi:10.1109/mm.2021.3139092","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2021.3139092","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027048290","display_name":"Kartik Lakhotia","orcid":"https://orcid.org/0000-0002-9414-8481"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kartik Lakhotia","raw_affiliation_strings":["Intel Labs, Santa Clara, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-9414-8481","affiliations":[{"raw_affiliation_string":"Intel Labs, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066195314","display_name":"Fabrizio Petrini","orcid":"https://orcid.org/0000-0002-4977-7107"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabrizio Petrini","raw_affiliation_strings":["Intel Labs, Santa Clara, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042560222","display_name":"Rajgopal Kannan","orcid":"https://orcid.org/0000-0001-8736-3012"},"institutions":[{"id":"https://openalex.org/I4210088792","display_name":"United States Army","ror":"https://ror.org/00afsp483","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I4210088792"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajgopal Kannan","raw_affiliation_strings":["U.S. Army Research Lab, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0001-8736-3012","affiliations":[{"raw_affiliation_string":"U.S. Army Research Lab, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I4210088792"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033166029","display_name":"Viktor K. Prasanna","orcid":"https://orcid.org/0000-0002-1609-8589"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor Prasanna","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1609-8589","affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4158,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.81531394,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"42","issue":"2","first_page":"44","last_page":"52"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8429995179176331},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.6360960006713867},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4919297695159912},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3878940939903259},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38100701570510864},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3436030149459839}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8429995179176331},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.6360960006713867},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4919297695159912},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3878940939903259},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38100701570510864},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3436030149459839},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mm.2021.3139092","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2021.3139092","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8469920209","display_name":null,"funder_award_id":"HROOll17-3-0004","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1572016165","https://openalex.org/W2057332538","https://openalex.org/W2072910106","https://openalex.org/W2096811399","https://openalex.org/W2405855321","https://openalex.org/W2534577245","https://openalex.org/W2962747323","https://openalex.org/W3037182822","https://openalex.org/W3043522163","https://openalex.org/W3093099168","https://openalex.org/W3202074835","https://openalex.org/W4301239768","https://openalex.org/W6748645090","https://openalex.org/W6783938941"],"related_works":["https://openalex.org/W2418291489","https://openalex.org/W3096519538","https://openalex.org/W2744747300","https://openalex.org/W4241166160","https://openalex.org/W2532234348","https://openalex.org/W2068121105","https://openalex.org/W2384826897","https://openalex.org/W1973516247","https://openalex.org/W1997466117","https://openalex.org/W2795695574"],"abstract_inverted_index":{"The":[0],"Intel":[1],"Programmable":[2],"Integrated":[3],"Unified":[4],"Memory":[5],"Architecture":[6],"(PIUMA)":[7],"system":[8],"maps":[9],"collective":[10,23],"operations":[11],"directly":[12],"into":[13],"the":[14,49,89],"network":[15,30,81],"switches":[16],"and":[17,28,74],"supports":[18],"pipelined":[19],"embeddings":[20,41],"for":[21,42],"high-throughput":[22],"computation.":[24],"Utilizing":[25],"these":[26],"features":[27],"PIUMA\u2019s":[29],"topology,":[31],"we":[32,86],"develop":[33],"a":[34],"methodology":[35],"to":[36,66],"generate":[37],"extremely":[38],"low":[39],"latency":[40,61,73],"in-network":[43,51,93],"Allreduce.":[44],"Our":[45],"analysis":[46],"shows":[47],"that":[48],"proposed":[50],"Allreduce":[52,94],"is":[53],"highly":[54],"scalable,":[55],"with":[56],"less":[57,72],"than":[58],"1.5-\u03bcs":[59],"single-element":[60],"on":[62,95],"16K":[63],"nodes.":[64],"Compared":[65],"host-based":[67],"Allreduce,":[68],"it":[69],"exhibits":[70],"36\u00d7":[71],"3.5\u00d7":[75],"higher":[76],"throughput.":[77],"With":[78],"deep":[79],"neural":[80],"training":[82],"as":[83],"an":[84],"example,":[85],"further":[87],"demonstrate":[88],"benefits":[90],"of":[91],"our":[92],"end-user":[96],"applications.":[97]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
