{"id":"https://openalex.org/W4378801114","doi":"https://doi.org/10.1145/3558481.3591083","title":"Multiplying 2 \u00d7 2 Sub-Blocks Using 4 Multiplications","display_name":"Multiplying 2 \u00d7 2 Sub-Blocks Using 4 Multiplications","publication_year":2023,"publication_date":"2023-05-31","ids":{"openalex":"https://openalex.org/W4378801114","doi":"https://doi.org/10.1145/3558481.3591083"},"language":"en","primary_location":{"id":"doi:10.1145/3558481.3591083","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3558481.3591083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3558481.3591083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3558481.3591083","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092052239","display_name":"Yoav Moran","orcid":"https://orcid.org/0009-0000-5813-347X"},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yoav Moran","raw_affiliation_strings":["The Hebrew University of Jerusalem, Jerusalem, Israel"],"raw_orcid":"https://orcid.org/0009-0000-5813-347X","affiliations":[{"raw_affiliation_string":"The Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010957839","display_name":"Oded Schwartz","orcid":"https://orcid.org/0000-0003-1309-5566"},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Oded Schwartz","raw_affiliation_strings":["The Hebrew University of Jerusalem, Jerusalem, Israel"],"raw_orcid":"https://orcid.org/0000-0003-1309-5566","affiliations":[{"raw_affiliation_string":"The Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.57,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.59535025,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"379","last_page":"390"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.6323679685592651},{"id":"https://openalex.org/keywords/multiplication-algorithm","display_name":"Multiplication algorithm","score":0.6253196597099304},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.592003345489502},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.5158780813217163},{"id":"https://openalex.org/keywords/scalar-multiplication","display_name":"Scalar multiplication","score":0.4991483688354492},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4916699528694153},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4796767234802246},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.47949105501174927},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4485286474227905},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.42584189772605896},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.3743691146373749},{"id":"https://openalex.org/keywords/scalar","display_name":"Scalar (mathematics)","score":0.2755173444747925},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.12053126096725464}],"concepts":[{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.6323679685592651},{"id":"https://openalex.org/C201290732","wikidata":"https://www.wikidata.org/wiki/Q130762","display_name":"Multiplication algorithm","level":3,"score":0.6253196597099304},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.592003345489502},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.5158780813217163},{"id":"https://openalex.org/C171182647","wikidata":"https://www.wikidata.org/wiki/Q126736","display_name":"Scalar multiplication","level":3,"score":0.4991483688354492},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4916699528694153},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4796767234802246},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.47949105501174927},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4485286474227905},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.42584189772605896},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.3743691146373749},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.2755173444747925},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.12053126096725464},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3558481.3591083","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3558481.3591083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3558481.3591083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3558481.3591083","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3558481.3591083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3558481.3591083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8899999856948853,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G4601600730","display_name":"Fault Tolerant High Performance Computing","funder_award_id":"818252","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4378801114.pdf","grobid_xml":"https://content.openalex.org/works/W4378801114.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1520017445","https://openalex.org/W1965900467","https://openalex.org/W1972501001","https://openalex.org/W1977662557","https://openalex.org/W1979740015","https://openalex.org/W1986272312","https://openalex.org/W1993929164","https://openalex.org/W1998927407","https://openalex.org/W2002048434","https://openalex.org/W2003870914","https://openalex.org/W2004948173","https://openalex.org/W2019865138","https://openalex.org/W2020089563","https://openalex.org/W2023724794","https://openalex.org/W2026405063","https://openalex.org/W2032839380","https://openalex.org/W2035476608","https://openalex.org/W2038073775","https://openalex.org/W2055569999","https://openalex.org/W2061851195","https://openalex.org/W2065815779","https://openalex.org/W2073389244","https://openalex.org/W2073948477","https://openalex.org/W2075577372","https://openalex.org/W2111612671","https://openalex.org/W2120248756","https://openalex.org/W2120489629","https://openalex.org/W2152069626","https://openalex.org/W2157323434","https://openalex.org/W2168748809","https://openalex.org/W2473648675","https://openalex.org/W2941605743","https://openalex.org/W2950034300","https://openalex.org/W2953126861","https://openalex.org/W2965436164","https://openalex.org/W2971930414","https://openalex.org/W2984305089","https://openalex.org/W3009870468","https://openalex.org/W3093790569","https://openalex.org/W3100464736","https://openalex.org/W3116253690","https://openalex.org/W3143939112","https://openalex.org/W3190062760","https://openalex.org/W4256483581"],"related_works":["https://openalex.org/W2596457687","https://openalex.org/W2350191890","https://openalex.org/W3099313426","https://openalex.org/W752783541","https://openalex.org/W4287593139","https://openalex.org/W2050885298","https://openalex.org/W1516189266","https://openalex.org/W3134740226","https://openalex.org/W2093666864","https://openalex.org/W4378602953"],"abstract_inverted_index":{"Fast":[0],"parallel":[1,159],"and":[2,77,112,157],"sequential":[3,156],"matrix":[4,82],"multiplication":[5,83,90],"algorithms":[6,70],"switch":[7],"to":[8,137],"the":[9,18,35,50,54,58,117,122,139,143,155,158,163,167,173,177,189,215,234,238],"cubic":[10],"time":[11],"classical":[12,19,36,55,140],"algorithm":[13,20,31,48,56,60,200,212,231,249],"on":[14,24,39,104,221],"small":[15,25,40,62,123],"sub-blocks":[16,124],"as":[17],"requires":[21,91],"fewer":[22],"operations":[23],"blocks.":[26,63],"We":[27,241],"obtain":[28,198,229],"a":[29,87,126,146,243],"new":[30,244],"that":[32,53,71],"can":[33],"outperform":[34],"one,":[37],"even":[38],"blocks,":[41],"by":[42,125,232],"trading":[43],"multiplications":[44],"with":[45,80],"additions.":[46],"This":[47,211],"contradicts":[49,214],"common":[51],"belief":[52],"is":[57,109,260],"fastest":[59],"for":[61,101,182,192,201,250],"To":[64],"this":[65,230],"end,":[66],"we":[67,197,228],"introduce":[68],"commutative":[69],"generalize":[72],"Winograd's":[73],"folding":[74],"technique":[75,115,170,259],"(1968)":[76],"combine":[78],"it":[79],"fast":[81],"algorithms.":[84],"Thus,":[85],"when":[86],"single":[88],"scalar":[89],"\u03c1":[92,108,129,180,190],"times":[93],"more":[94],"clock":[95],"cycles":[96],"than":[97,188],"an":[98,199],"addition":[99],"(e.g.,":[100],"16-bit":[102],"integers":[103],"Intel's":[105],"Skylake":[106],"microarchitecture,":[107],"between":[110],"1.5":[111],"5),":[113],"our":[114,248,258],"reduces":[116,172],"computation":[118],"cost":[119,151,175],"of":[120,128,145,166,176,218,237],"multiplying":[121,202,222],"factor":[127],"+":[130,134],"3":[131],"over":[132],"2(\u03c1":[133],"1)":[135],"compared":[136],"using":[138,207],"algorithm,":[141],"at":[142],"price":[144],"low":[147],"order":[148],"term":[149],"communication":[150],"overhead":[152],"both":[153],"in":[154],"cases,":[160],"thus":[161,256],"reducing":[162],"total":[164],"runtime":[165],"algorithm.":[168,178],"Our":[169],"also":[171],"energy":[174,183],"The":[179],"values":[181,191],"costs":[184],"are":[185],"typically":[186],"larger":[187],"arithmetic":[193],"costs.":[194],"For":[195],"example,":[196],"2":[203,205,223,225,251,253],"x":[204,224,252],"blocks":[206],"only":[208],"four":[209],"multiplications.":[210],"seemingly":[213],"lower":[216,239,245],"bound":[217,246],"Winograd":[219],"(1971)":[220],"matrices.":[226],"However,":[227],"bypassing":[233],"implicit":[235],"assumptions":[236],"bound.":[240],"provide":[242],"matching":[247],"block":[254],"multiplication,":[255],"showing":[257],"optimal.":[261]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}
