{"id":"https://openalex.org/W4388664805","doi":"https://doi.org/10.1145/3581784.3607092","title":"Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters","display_name":"Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters","publication_year":2023,"publication_date":"2023-11-11","ids":{"openalex":"https://openalex.org/W4388664805","doi":"https://doi.org/10.1145/3581784.3607092"},"language":"en","primary_location":{"id":"doi:10.1145/3581784.3607092","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607092","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607092","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607092","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038051093","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0003-3750-1178"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-3750-1178","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, United States of America","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011195132","display_name":"Nan Ding","orcid":"https://orcid.org/0000-0001-9624-9449"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nan Ding","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-9624-9449","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, United States of America","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048170299","display_name":"Piyush Sao","orcid":"https://orcid.org/0000-0002-9432-5855"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Piyush Sao","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-9432-5855","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, United States of America","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102746800","display_name":"Samuel Williams","orcid":"https://orcid.org/0000-0002-8327-5717"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Williams","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-8327-5717","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, United States of America","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006873445","display_name":"Xiaoye Sherry Li","orcid":"https://orcid.org/0000-0002-0747-698X"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoye Sherry Li","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-0747-698X","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, United States of America","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.995,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86430634,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8279765248298645},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.71587073802948},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.6048573851585388},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5489099025726318},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5188702344894409},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.47105643153190613},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.45134541392326355},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4248582124710083},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3541979193687439},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0774371325969696},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.07185912132263184}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8279765248298645},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.71587073802948},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.6048573851585388},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5489099025726318},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5188702344894409},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.47105643153190613},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.45134541392326355},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4248582124710083},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3541979193687439},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0774371325969696},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.07185912132263184},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3581784.3607092","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607092","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607092","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt92r7t7rv","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/92r7t7rv","pdf_url":"https://escholarship.org/content/qt92r7t7rv/qt92r7t7rv.pdf","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:osti.gov:2438981","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/2438981","pdf_url":"https://www.osti.gov/servlets/purl/2438981","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"pmh:ark:/13030/qt92r7t7rv","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1145/3581784.3607092","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581784.3607092","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581784.3607092","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1071216263","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332369","funder_display_name":"National Nuclear Security Administration"},{"id":"https://openalex.org/G1286236842","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1489425746","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G1645119126","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1677143136","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G1799333409","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320337506","funder_display_name":"Advanced Scientific Computing Research"},{"id":"https://openalex.org/G2900453181","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332369","funder_display_name":"National Nuclear Security Administration"},{"id":"https://openalex.org/G3083819904","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3232922872","display_name":null,"funder_award_id":"17-SC-20-SC","funder_id":"https://openalex.org/F4320332369","funder_display_name":"National Nuclear Security Administration"},{"id":"https://openalex.org/G3976335900","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320338292","funder_display_name":"Lawrence Berkeley National Laboratory"},{"id":"https://openalex.org/G4327825534","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320332369","funder_display_name":"National Nuclear Security Administration"},{"id":"https://openalex.org/G4501827968","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4565140552","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5076365615","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G6348972864","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6859237865","display_name":null,"funder_award_id":"17-SC-20-SC","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7354628648","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320338292","funder_display_name":"Lawrence Berkeley National Laboratory"},{"id":"https://openalex.org/G7368046788","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7651947074","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G7995982022","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8052219770","display_name":null,"funder_award_id":"17-SC-20-SC","funder_id":"https://openalex.org/F4320337506","funder_display_name":"Advanced Scientific Computing Research"},{"id":"https://openalex.org/G8253304997","display_name":null,"funder_award_id":"17-SC-20-SC","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G8269158468","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G8306346728","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320338292","funder_display_name":"Lawrence Berkeley National Laboratory"},{"id":"https://openalex.org/G8906985441","display_name":null,"funder_award_id":"00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320317220","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320332369","display_name":"National Nuclear Security Administration","ror":"https://ror.org/03sk1we31"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"},{"id":"https://openalex.org/F4320338292","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388664805.pdf","grobid_xml":"https://content.openalex.org/works/W4388664805.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1970009422","https://openalex.org/W2012715759","https://openalex.org/W2035080386","https://openalex.org/W2038205735","https://openalex.org/W2045618500","https://openalex.org/W2055094346","https://openalex.org/W2066166631","https://openalex.org/W2070232376","https://openalex.org/W2108520826","https://openalex.org/W2127009405","https://openalex.org/W2141331848","https://openalex.org/W2145194992","https://openalex.org/W2169150754","https://openalex.org/W2585804629","https://openalex.org/W2741907101","https://openalex.org/W2754559815","https://openalex.org/W2789228469","https://openalex.org/W2805150752","https://openalex.org/W2887674375","https://openalex.org/W2915551302","https://openalex.org/W2942021433","https://openalex.org/W2973889589","https://openalex.org/W3001467520","https://openalex.org/W3072519247","https://openalex.org/W3137858876","https://openalex.org/W3187657707","https://openalex.org/W4206624271","https://openalex.org/W4210574274","https://openalex.org/W4298218678"],"related_works":["https://openalex.org/W2186864281","https://openalex.org/W4255427455","https://openalex.org/W1966025497","https://openalex.org/W68941528","https://openalex.org/W4206451355","https://openalex.org/W8322802","https://openalex.org/W314331466","https://openalex.org/W233983175","https://openalex.org/W2355089277","https://openalex.org/W2312486021"],"abstract_inverted_index":{"This":[0,79],"paper":[1],"presents":[2],"a":[3,23,37,46,62],"unified":[4],"communication":[5,58,73,89,101,113,122,126,137],"optimization":[6,59,138],"framework":[7,20],"for":[8],"sparse":[9,38,94],"triangular":[10],"solve":[11],"(SpTRSV)":[12],"algorithms":[13],"on":[14,114,128],"CPU":[15,115,164,184],"and":[16,74,86,99,135],"GPU":[17,129,170,200],"clusters.":[18,116,130],"The":[19],"builds":[21],"upon":[22],"3D":[24,64,142,155,171,185,201],"communication-avoiding":[25],"(CA)":[26],"layout":[27],"of":[28,109],"Px":[29,47],"\u00d7":[30,32,48],"Py":[31,49],"Pz":[33,41,189],"processes":[34],"that":[35,197],"divides":[36],"matrix":[39],"into":[40],"submatrices,":[42],"each":[43],"handled":[44],"by":[45],"2D":[50,112,215],"grid":[51],"with":[52,76,93,188],"block-cyclic":[53],"distribution.":[54],"We":[55],"propose":[56],"three":[57],"strategies:":[60],"First,":[61],"new":[63],"SpTRSV":[65,143,156,172,186,202,216],"algorithm":[66,144,157,173,187,217],"is":[67,90,195],"developed,":[68],"which":[69],"trades":[70],"the":[71,87,110,125,140,153,168,182,198,209,213],"inter-grid":[72,84,88],"synchronization":[75],"replicated":[77],"computation.":[78],"design":[80],"requires":[81],"only":[82,219],"one":[83],"synchronization,":[85],"efficiently":[91],"implemented":[92],"allreduce":[95],"operations.":[96],"Second,":[97],"broadcast":[98],"reduction":[100],"trees":[102,127],"are":[103],"used":[104],"to":[105,123,148,152,160,177,181,191,205,222],"reduce":[106],"message":[107],"latency":[108],"intra-grid":[111,136],"Finally,":[117],"we":[118],"leverage":[119],"GPU-initiated":[120],"one-sided":[121],"implement":[124],"With":[131],"these":[132],"nested":[133],"inter-":[134],"strategies,":[139],"proposed":[141,169,183,199],"can":[145,174,203,218],"attain":[146],"up":[147,159,176,190,221],"3.45x":[149],"speedups":[150,179],"compared":[151,180],"baseline":[154],"using":[158,208],"2048":[161],"Cori":[162],"Haswell":[163],"cores.":[165],"In":[166],"addition,":[167],"achieve":[175],"6.5x":[178],"64.":[192],"Finally":[193],"it":[194],"remarkable":[196],"scale":[204,220],"256":[206],"GPUs":[207],"Perlmutter":[210],"system":[211],"while":[212],"existing":[214],"4":[223],"GPUs.":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4}],"updated_date":"2026-06-21T07:57:09.225873","created_date":"2025-10-10T00:00:00"}
