{"id":"https://openalex.org/W4413411914","doi":"https://doi.org/10.1145/3721145.3725773","title":"MAGNUS: Generating Data Locality to Accelerate Sparse Matrix-Matrix Multiplication on CPUs","display_name":"MAGNUS: Generating Data Locality to Accelerate Sparse Matrix-Matrix Multiplication on CPUs","publication_year":2025,"publication_date":"2025-06-08","ids":{"openalex":"https://openalex.org/W4413411914","doi":"https://doi.org/10.1145/3721145.3725773"},"language":"en","primary_location":{"id":"doi:10.1145/3721145.3725773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721145.3725773","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3721145.3725773","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3721145.3725773","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078336285","display_name":"Jordi Wolfson-Pou","orcid":"https://orcid.org/0009-0003-2952-2996"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jordi Wolfson-Pou","raw_affiliation_strings":["Intel Labs, Santa Clara, USA"],"affiliations":[{"raw_affiliation_string":"Intel Labs, Santa Clara, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031307529","display_name":"Jan Laukemann","orcid":"https://orcid.org/0000-0002-3776-9353"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Laukemann","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066195314","display_name":"Fabrizio Petrini","orcid":"https://orcid.org/0000-0002-4977-7107"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabrizio Petrini","raw_affiliation_strings":["Intel Labs, Santa Clara, USA"],"affiliations":[{"raw_affiliation_string":"Intel Labs, Santa Clara, USA","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5078336285"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":2.5178,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89966598,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"442","last_page":"457"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7893179059028625},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.755057156085968},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6641356348991394},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6501901149749756},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5793943405151367},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5775926113128662},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.5462573766708374},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20652738213539124},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.12224629521369934},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07425644993782043}],"concepts":[{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7893179059028625},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.755057156085968},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6641356348991394},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6501901149749756},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5793943405151367},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5775926113128662},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.5462573766708374},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20652738213539124},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.12224629521369934},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07425644993782043},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3721145.3725773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721145.3725773","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3721145.3725773","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3721145.3725773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721145.3725773","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3721145.3725773","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413411914.pdf","grobid_xml":"https://content.openalex.org/works/W4413411914.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W784772322","https://openalex.org/W1482680420","https://openalex.org/W1515144947","https://openalex.org/W1665157575","https://openalex.org/W1961194538","https://openalex.org/W1980282429","https://openalex.org/W2010370862","https://openalex.org/W2035080386","https://openalex.org/W2060064441","https://openalex.org/W2089437293","https://openalex.org/W2095249664","https://openalex.org/W2168931017","https://openalex.org/W2235832317","https://openalex.org/W2410461480","https://openalex.org/W2544624872","https://openalex.org/W2741951152","https://openalex.org/W2751658790","https://openalex.org/W2766575514","https://openalex.org/W2781869913","https://openalex.org/W2908198074","https://openalex.org/W2914086833","https://openalex.org/W2962729196","https://openalex.org/W2964336816","https://openalex.org/W2970435804","https://openalex.org/W2995872593","https://openalex.org/W2997005340","https://openalex.org/W2998804949","https://openalex.org/W3000305333","https://openalex.org/W3016542674","https://openalex.org/W3041191963","https://openalex.org/W3043796721","https://openalex.org/W3100632412","https://openalex.org/W3105937213","https://openalex.org/W3116183289","https://openalex.org/W3155922894","https://openalex.org/W3174083114","https://openalex.org/W3184168441","https://openalex.org/W3216997764","https://openalex.org/W4207047521","https://openalex.org/W4283033672","https://openalex.org/W4293084046","https://openalex.org/W4309620166","https://openalex.org/W4312512934","https://openalex.org/W4385623132","https://openalex.org/W4386709563","https://openalex.org/W4388581352"],"related_works":["https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W752783541","https://openalex.org/W1506547947","https://openalex.org/W2995605830","https://openalex.org/W4206811032","https://openalex.org/W2086123442","https://openalex.org/W1925544630","https://openalex.org/W2596457687","https://openalex.org/W3212757063"],"abstract_inverted_index":{"Sparse":[0],"general":[1],"matrix-matrix":[2],"multiplication":[3],"(SpGEMM)":[4],"is":[5,68,77,83,85,102,150,174,184,212],"a":[6,42,63,116,125,159],"critical":[7],"operation":[8],"in":[9,49,120,180],"many":[10],"applications.Current":[11],"multithreaded":[12],"implementations":[13],"are":[14],"based":[15,88,128],"on":[16,23,89,129,164],"Gustavson's":[17],"algorithm":[18,44,141],"and":[19,93,142,183,227],"often":[20,185],"perform":[21],"poorly":[22],"large":[24],"matrices":[25,163,168],"due":[26],"to":[27,45,70,201,214],"limited":[28],"cache":[29],"reuse":[30],"by":[31,104],"the":[32,55,74,81,90,97,106,110,170,178,202,207,215,223],"accumulators.We":[33],"present":[34],"MAGNUS":[35,53,149,173,199,211],"(Matrix":[36],"Algebra":[37],"for":[38,115,158,218],"Gigantic":[39],"NUmerical":[40],"Systems),":[41],"novel":[43],"maximize":[46],"data":[47,112],"locality":[48],"SpGEMM.To":[50],"generate":[51],"locality,":[52],"reorders":[54],"intermediate":[56],"product":[57],"into":[58],"discrete":[59],"cache-friendly":[60],"chunks":[61,101],"using":[62],"two-level":[64],"hierarchical":[65],"approach.The":[66],"accumulator":[67,82,127],"applied":[69],"each":[71,122],"chunk,":[72],"where":[73],"chunk":[75,123],"size":[76],"chosen":[78],"such":[79],"that":[80],"cacheefficient.MAGNUS":[84],"input-and":[86],"system-aware:":[87],"matrix":[91,204,224],"characteristics":[92],"target":[94],"system":[95],"specifications,":[96],"optimal":[98,216],"number":[99],"of":[100,109,148,161,188,222],"computed":[103],"minimizing":[105],"storage":[107],"cost":[108],"necessary":[111],"structures.MAGNUS":[113],"allows":[114],"hybrid":[117],"accumulation":[118],"strategy":[119],"which":[121],"uses":[124],"different":[126,162],"an":[130,136,186],"input":[131],"threshold.We":[132],"consider":[133],"two":[134],"accumulators:":[135],"AVX-512":[137],"vectorized":[138],"bitonic":[139],"sorting":[140],"classical":[143],"dense":[144],"accumulation.An":[145],"OpenMP":[146],"implementation":[147],"compared":[151],"with":[152],"several":[153],"baselines,":[154],"including":[155],"Intel":[156,166],"MKL,":[157],"variety":[160],"three":[165],"architectures.For":[167],"from":[169],"SuiteSparse":[171],"collection,":[172],"faster":[175,190],"than":[176,191],"all":[177],"baselines":[179,208],"most":[181],"cases":[182],"order":[187],"magnitude":[189],"at":[192],"least":[193],"one":[194],"baseline.For":[195],"massive":[196],"random":[197],"matrices,":[198,220],"scales":[200],"largest":[203],"sizes,":[205],"while":[206],"do":[209],"not.Furthermore,":[210],"close":[213],"bound":[217],"these":[219],"regardless":[221],"size,":[225],"structure,":[226],"density.":[228]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
