{"id":"https://openalex.org/W4412480888","doi":"https://doi.org/10.1145/3694906.3743357","title":"Brief Announcement: Optimality Conditions for Parallel Communication-Avoiding Matrix Multiplication with Overlapped Communication","display_name":"Brief Announcement: Optimality Conditions for Parallel Communication-Avoiding Matrix Multiplication with Overlapped Communication","publication_year":2025,"publication_date":"2025-07-16","ids":{"openalex":"https://openalex.org/W4412480888","doi":"https://doi.org/10.1145/3694906.3743357"},"language":"en","primary_location":{"id":"doi:10.1145/3694906.3743357","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694906.3743357","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694906.3743357","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3694906.3743357","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088588076","display_name":"Mikhail Isaev","orcid":"https://orcid.org/0000-0002-2599-9741"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Mikhail Isaev","raw_affiliation_strings":["NVIDIA, Atlanta, Georgia, USA"],"raw_orcid":"https://orcid.org/0000-0002-2599-9741","affiliations":[{"raw_affiliation_string":"NVIDIA, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I4210127875","https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076067612","display_name":"Srinivas Eswar","orcid":"https://orcid.org/0000-0002-3418-7796"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srinivas Eswar","raw_affiliation_strings":["Argonne National Laboratory, Lemont, Illinois, USA"],"raw_orcid":"https://orcid.org/0000-0002-3418-7796","affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, Lemont, Illinois, USA","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016738770","display_name":"Richard Vuduc","orcid":"https://orcid.org/0000-0003-2178-138X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Vuduc","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia, USA"],"raw_orcid":"https://orcid.org/0000-0003-2178-138X","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088588076"],"corresponding_institution_ids":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1821531,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"582","last_page":"586"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7152391076087952},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.6514040231704712},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5843647718429565},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.46687716245651245},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.43674683570861816},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.35211867094039917},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16790729761123657},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.054838478565216064}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7152391076087952},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.6514040231704712},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5843647718429565},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46687716245651245},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.43674683570861816},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.35211867094039917},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16790729761123657},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.054838478565216064},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3694906.3743357","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694906.3743357","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694906.3743357","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3694906.3743357","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694906.3743357","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694906.3743357","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1313983767","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320338284","funder_display_name":"Argonne National Laboratory"},{"id":"https://openalex.org/G1751644051","display_name":null,"funder_award_id":"DE-AC02-06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G2205765577","display_name":null,"funder_award_id":"2316176","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2777053550","display_name":null,"funder_award_id":"AC02-06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3075337988","display_name":null,"funder_award_id":"06CH11357","funder_id":"https://openalex.org/F4320338284","funder_display_name":"Argonne National Laboratory"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5085543421","display_name":null,"funder_award_id":"AC02-06CH11357","funder_id":"https://openalex.org/F4320338284","funder_display_name":"Argonne National Laboratory"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6848031779","display_name":null,"funder_award_id":"06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6918803902","display_name":null,"funder_award_id":"06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G7351994996","display_name":null,"funder_award_id":"DE-AC02-06CH11357","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8143874970","display_name":null,"funder_award_id":"AC02-06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320338284","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412480888.pdf","grobid_xml":"https://content.openalex.org/works/W4412480888.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W201315547","https://openalex.org/W1964711958","https://openalex.org/W2010747199","https://openalex.org/W2012652661","https://openalex.org/W2023122441","https://openalex.org/W2093529653","https://openalex.org/W2093608182","https://openalex.org/W2168748809","https://openalex.org/W2565436413","https://openalex.org/W2984305089","https://openalex.org/W4250730856","https://openalex.org/W4285006655","https://openalex.org/W4312060029"],"related_works":["https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W2057797376","https://openalex.org/W2090319426","https://openalex.org/W752783541","https://openalex.org/W2036954759","https://openalex.org/W2506252583","https://openalex.org/W1506547947","https://openalex.org/W4406620725","https://openalex.org/W2048249848"],"abstract_inverted_index":{"When":[0],"considering":[1],"general":[2],"matrix":[3,110],"multiply":[4],"(GEMM)":[5],"algorithms":[6,57],"for":[7,91,108,115],"distributed-memory":[8],"systems,":[9],"the":[10,30,40,52,72,81,96,122,130],"dominant":[11],"paradigm":[12],"is":[13,103],"to":[14,19,71,83,127],"minimize":[15,20],"communication":[16,26,35,62,70,85,116],"volume.":[17],"However,":[18],"time,":[21],"one":[22],"must":[23],"consider":[24],"how":[25],"volume":[27,117],"interacts":[28],"with":[29,86,129],"system":[31],"characteristics":[32],"---":[33,58,67],"namely,":[34],"bandwidth,":[36],"memory":[37,66,75],"capacity,":[38],"and":[39,44,118],"co-scheduling":[41],"of":[42,54,74,95,105],"computation":[43],"communication.":[45],"In":[46],"this":[47],"work,":[48],"we":[49],"demonstrate":[50],"that":[51,112],"family":[53],"3D":[55,123],"GEMM":[56,97,124],"although":[59],"reducing":[60],"overall":[61],"by":[63],"leveraging":[64],"extra":[65],"fundamentally":[68],"concentrates":[69],"phase":[73],"filling.":[76],"This":[77],"upfront":[78],"cost":[79],"hinders":[80],"ability":[82],"overlap":[84,119],"computation,":[87],"and,":[88],"consequently,":[89],"calls":[90],"a":[92],"revised":[93],"view":[94],"optimality":[98,106,125],"regions.":[99],"Our":[100],"main":[101],"contribution":[102],"derivation":[104],"conditions":[107],"parallel":[109],"multiplication":[111],"jointly":[113],"accounts":[114],"effects,":[120],"pushing":[121],"region":[126],"start":[128],"systems":[131],"more":[132],"than":[133,136],"11\u00d7":[134],"larger":[135],"previously":[137],"believed.":[138]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2025-10-10T00:00:00"}
