{"id":"https://openalex.org/W4412848226","doi":"https://doi.org/10.1177/10943420251363423","title":"A compilation-based approach to performant reduction and redistribution collective communication algorithms","display_name":"A compilation-based approach to performant reduction and redistribution collective communication algorithms","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4412848226","doi":"https://doi.org/10.1177/10943420251363423"},"language":"en","primary_location":{"id":"doi:10.1177/10943420251363423","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420251363423","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1177/10943420251363423","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047832410","display_name":"A. Jocksch","orcid":"https://orcid.org/0000-0002-3327-4230"},"institutions":[{"id":"https://openalex.org/I4210094294","display_name":"Supercomputing Systems (Switzerland)","ror":"https://ror.org/00nmpgc29","country_code":"CH","type":"company","lineage":["https://openalex.org/I4210094294"]},{"id":"https://openalex.org/I4391767926","display_name":"CSCS - Swiss National Supercomputing Centre","ror":"https://ror.org/04rzmms09","country_code":null,"type":"facility","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088","https://openalex.org/I4391767926"]},{"id":"https://openalex.org/I59105498","display_name":"Swisscom (Switzerland)","ror":"https://ror.org/04t1f4f50","country_code":"CH","type":"company","lineage":["https://openalex.org/I59105498"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Andreas Jocksch","raw_affiliation_strings":["Swiss National Supercomputing Centre","ETH Zurich / CSCS, Swiss National Supercomputing Centre, Switzerland"],"affiliations":[{"raw_affiliation_string":"Swiss National Supercomputing Centre","institution_ids":["https://openalex.org/I59105498","https://openalex.org/I4391767926"]},{"raw_affiliation_string":"ETH Zurich / CSCS, Swiss National Supercomputing Centre, Switzerland","institution_ids":["https://openalex.org/I59105498","https://openalex.org/I4210094294"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114538340","display_name":"C. Nicole Avans","orcid":null},"institutions":[{"id":"https://openalex.org/I63920570","display_name":"Tennessee Technological University","ror":"https://ror.org/05drmrq39","country_code":"US","type":"education","lineage":["https://openalex.org/I63920570"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Nicole Avans","raw_affiliation_strings":["Tennessee Technological University","Tennessee Technological University, USA"],"affiliations":[{"raw_affiliation_string":"Tennessee Technological University","institution_ids":["https://openalex.org/I63920570"]},{"raw_affiliation_string":"Tennessee Technological University, USA","institution_ids":["https://openalex.org/I63920570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093967949","display_name":"Riley Shipley","orcid":"https://orcid.org/0000-0002-7486-7542"},"institutions":[{"id":"https://openalex.org/I63920570","display_name":"Tennessee Technological University","ror":"https://ror.org/05drmrq39","country_code":"US","type":"education","lineage":["https://openalex.org/I63920570"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Riley Shipley","raw_affiliation_strings":["Tennessee Technological University","Tennessee Technological University, USA"],"affiliations":[{"raw_affiliation_string":"Tennessee Technological University","institution_ids":["https://openalex.org/I63920570"]},{"raw_affiliation_string":"Tennessee Technological University, USA","institution_ids":["https://openalex.org/I63920570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026440046","display_name":"Anthony Skjellum","orcid":"https://orcid.org/0000-0001-5252-6600"},"institutions":[{"id":"https://openalex.org/I63920570","display_name":"Tennessee Technological University","ror":"https://ror.org/05drmrq39","country_code":"US","type":"education","lineage":["https://openalex.org/I63920570"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anthony Skjellum","raw_affiliation_strings":["Tennessee Technological University","Tennessee Technological University, USA"],"affiliations":[{"raw_affiliation_string":"Tennessee Technological University","institution_ids":["https://openalex.org/I63920570"]},{"raw_affiliation_string":"Tennessee Technological University, USA","institution_ids":["https://openalex.org/I63920570"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047832410"],"corresponding_institution_ids":["https://openalex.org/I4210094294","https://openalex.org/I4391767926","https://openalex.org/I59105498"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20153895,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"2","first_page":"219","last_page":"239"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6917009353637695},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.6027694344520569},{"id":"https://openalex.org/keywords/redistribution","display_name":"Redistribution (election)","score":0.5934569239616394},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.428326815366745},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.1378641426563263},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12941619753837585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6917009353637695},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.6027694344520569},{"id":"https://openalex.org/C74080474","wikidata":"https://www.wikidata.org/wiki/Q7305975","display_name":"Redistribution (election)","level":3,"score":0.5934569239616394},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.428326815366745},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.1378641426563263},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12941619753837585},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1177/10943420251363423","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420251363423","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:doi:10.3929/ethz-c-000797051","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11850/797051","pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1177/10943420251363423","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420251363423","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1556574299","https://openalex.org/W1573548168","https://openalex.org/W1825216778","https://openalex.org/W1987274898","https://openalex.org/W2045466102","https://openalex.org/W2056476515","https://openalex.org/W2109008148","https://openalex.org/W2116621495","https://openalex.org/W2118646353","https://openalex.org/W2123193689","https://openalex.org/W2131613942","https://openalex.org/W2443267493","https://openalex.org/W2748784321","https://openalex.org/W2886520239","https://openalex.org/W2914452456","https://openalex.org/W2942929621","https://openalex.org/W3027308092","https://openalex.org/W3096262771","https://openalex.org/W3187487531","https://openalex.org/W3200157745","https://openalex.org/W3206778761","https://openalex.org/W3211055848","https://openalex.org/W4200400880","https://openalex.org/W4205715945","https://openalex.org/W4221008227","https://openalex.org/W4283815349","https://openalex.org/W4385723308","https://openalex.org/W4388101525","https://openalex.org/W4388855519","https://openalex.org/W4388855615","https://openalex.org/W4396790354","https://openalex.org/W4402698578","https://openalex.org/W4407785164","https://openalex.org/W4409067491"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2080866340","https://openalex.org/W2390279801","https://openalex.org/W2519031001","https://openalex.org/W2002725676","https://openalex.org/W2352590024"],"abstract_inverted_index":{"Modern":[0],"supercomputers":[1],"feature":[2],"an":[3,50,116,198,225],"ever-increasing":[4],"degree":[5],"of":[6,12,26,44,104,129,196,200,210,227,261],"parallelism,":[7],"particularly":[8],"in":[9,22,35],"the":[10,93,102,136,144,149,178,182,204,262,277],"number":[11],"cores":[13],"per":[14],"node.":[15],"These":[16],"high":[17,39],"core":[18],"counts":[19],"are":[20,79,87,90,247],"considered":[21,81],"our":[23,41,107,168,274],"flexible":[24],"implementation":[25,108,250,275],"allreduce,":[27],"which":[28,164],"was":[29],"implemented":[30],"specifically":[31],"with":[32,167,219,233],"shared-memory":[33],"communication":[34],"mind.":[36],"At":[37],"a":[38,45,54,59,112,126,156,174,185,216,230],"level,":[40],"algorithm":[42,94,137,187],"consists":[43],"reduce_scatter":[46],"stage":[47,56],"followed":[48,57],"by":[49,58,268],"allgather":[51],"stage,":[52,61],"or":[53],"reduce":[55,75],"broadcast":[60,77],"and":[62,76,115,131,170,173,206,212,223,244],"allows":[63],"for":[64,125,134,271],"different":[65],"factors":[66],"(aka":[67],"multi-radix)":[68],"to":[69,97,203],"be":[70,140],"applied":[71],"at":[72],"each.":[73],"The":[74,119],"operations":[78],"also":[80,251],"as":[82,280],"standalone":[83],"functions.":[84],"Where":[85],"barriers":[86],"required,":[88],"they":[89],"integrated":[91],"into":[92,111],"using":[95],"counters":[96],"track":[98],"progress.":[99],"To":[100],"accommodate":[101],"complexity":[103],"this":[105],"approach,":[106],"is":[109,132,146,180,188,265],"split":[110],"setup":[113,120,169,179],"phase":[114,121],"execution":[117,150,171],"phase.":[118,151],"occurs":[122],"only":[123],"once":[124],"given":[127],"set":[128],"parameters,":[130],"responsible":[133],"determining":[135],"that":[138],"will":[139],"run":[141],"each":[142],"time":[143,184],"allreduce":[145,208],"called":[147],"within":[148],"We":[152],"present":[153],"two":[154],"interfaces:":[155],"persistent":[157,205],"collective":[158],"interface":[159],"(an":[160],"MPI":[161,214],"4.0":[162],"feature),":[163],"inherently":[165],"aligns":[166],"phases,":[172],"blocking":[175,207],"interface,":[176],"where":[177],"performed":[181],"first":[183],"specific":[186],"required.":[189],"Using":[190],"these":[191],"methods,":[192],"we":[193],"achieve":[194],"speedups":[195],"half":[197],"order":[199,226],"magnitude":[201,228],"compared":[202],"implementations":[209],"MPICH":[211],"Open":[213],"on":[215,229,238,241,255],"dual-socket":[217],"node":[218,232],"AMD":[220],"EPYC":[221],"processors,":[222],"almost":[224],"four-socket":[231],"NVIDIA":[234],"Grace-Hopper":[235],"processors.":[236],"Reductions":[237],"vectors":[239],"residing":[240],"both":[242],"CPU":[243],"GPU":[245],"memory":[246],"performed.":[248],"Our":[249],"achieves":[252,276],"good":[253],"performance":[254,279],"multiple":[256],"nodes.":[257],"A":[258],"standard":[259],"benchmark":[260],"application":[263],"CP2K":[264],"sped":[266],"up":[267],"2.5%.":[269],"Notably,":[270],"long":[272],"messages,":[273],"same":[278],"NCCL.":[281]},"counts_by_year":[],"updated_date":"2026-03-13T14:20:09.374765","created_date":"2025-10-10T00:00:00"}
