{"id":"https://openalex.org/W4417403262","doi":"https://doi.org/10.1109/pact65351.2025.00025","title":"Multiway Merge Partitioning for Sparse-Sparse Matrix Multiplication on GPUs","display_name":"Multiway Merge Partitioning for Sparse-Sparse Matrix Multiplication on GPUs","publication_year":2025,"publication_date":"2025-11-03","ids":{"openalex":"https://openalex.org/W4417403262","doi":"https://doi.org/10.1109/pact65351.2025.00025"},"language":null,"primary_location":{"id":"doi:10.1109/pact65351.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034423097","display_name":"Eric Lorimer","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eric Lorimer","raw_affiliation_strings":["Georgia Institute of Technology,United States of America"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,United States of America","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045920192","display_name":"Ruobing Han","orcid":"https://orcid.org/0000-0002-3090-3951"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruobing Han","raw_affiliation_strings":["Georgia Institute of Technology,United States of America"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,United States of America","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077042700","display_name":"Sung Ha Kang","orcid":"https://orcid.org/0000-0002-0312-6595"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sung Ha Kang","raw_affiliation_strings":["Georgia Institute of Technology,United States of America"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,United States of America","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000822269","display_name":"Hyesoon Kim","orcid":"https://orcid.org/0000-0002-6061-7825"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyesoon Kim","raw_affiliation_strings":["Georgia Institute of Technology,United States of America"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,United States of America","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034423097"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4670137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"160","last_page":"171"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6136999726295471,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6136999726295471,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.15760000050067902,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.0471000000834465,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.6165000200271606},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5802000164985657},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5649999976158142},{"id":"https://openalex.org/keywords/disjoint-sets","display_name":"Disjoint sets","score":0.5533999800682068},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5203999876976013},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.4887000024318695},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.4196000099182129},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.41519999504089355},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4059999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7627999782562256},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.621399998664856},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.6165000200271606},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5802000164985657},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5649999976158142},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.5533999800682068},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5203999876976013},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.4887000024318695},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4374000132083893},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.4196000099182129},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.41519999504089355},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C35555965","wikidata":"https://www.wikidata.org/wiki/Q189057","display_name":"Merge sort","level":4,"score":0.4025000035762787},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.38929998874664307},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33719998598098755},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C104140500","wikidata":"https://www.wikidata.org/wiki/Q2088159","display_name":"Row and column spaces","level":3,"score":0.32519999146461487},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C85817219","wikidata":"https://www.wikidata.org/wiki/Q884772","display_name":"Block matrix","level":3,"score":0.2847000062465668},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.2768999934196472},{"id":"https://openalex.org/C200106649","wikidata":"https://www.wikidata.org/wiki/Q223683","display_name":"Transpose","level":3,"score":0.27239999175071716},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C94633896","wikidata":"https://www.wikidata.org/wiki/Q7140378","display_name":"Partial order reduction","level":3,"score":0.26510000228881836},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact65351.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact65351.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th International Conference on Parallel Architectures and Compilation Techniques (PACT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1528478977","https://openalex.org/W1548198155","https://openalex.org/W1973918431","https://openalex.org/W1980282429","https://openalex.org/W1985263109","https://openalex.org/W2048234997","https://openalex.org/W2059966434","https://openalex.org/W2066409766","https://openalex.org/W2089437293","https://openalex.org/W2155921933","https://openalex.org/W2168931017","https://openalex.org/W2298920323","https://openalex.org/W2810532458","https://openalex.org/W2914086833","https://openalex.org/W3000305333","https://openalex.org/W3091170309","https://openalex.org/W4220912491","https://openalex.org/W4309620166"],"related_works":[],"abstract_inverted_index":{"Sparse-sparse":[0],"matrix":[1,290],"multiplication":[2],"(SpGEMM)":[3],"is":[4,23,59,71,80,223],"a":[5,189,224,278],"well-studied":[6],"problem":[7],"on":[8,74,123,277,288],"CPUs,":[9],"GPUs,":[10,75],"accelerators":[11],"(e.g.":[12],"FPGAs),":[13],"and":[14,42,97,162,219,260,273,285,295,302,304],"distributed":[15],"systems.":[16],"The":[17],"main":[18],"computational":[19],"bottleneck":[20],"in":[21,61,114,159,198],"SpGEMM":[22,121,248,314],"the":[24,38,51,77,87,107,115,128,146,199,207,309],"reduction":[25,58],"process,":[26],"which":[27,110,125],"involves":[28],"matching":[29],"indices":[30],"to":[31,37,105,139,152,243,253,266,307],"accumulate":[32],"partial":[33,102,129],"products":[34,103,130],"that":[35,56,135,221],"map":[36],"same":[39],"output":[40,108,141],"locations":[41],"requires":[43],"irregular":[44],"memory":[45,52,65,79],"accesses.":[46],"Efficient":[47],"implementations":[48,255],"must":[49],"use":[50],"hierarchy":[53],"effectively":[54],"so":[55],"this":[57,144,179,203,217,222,236],"done":[60],"fast":[62],"local":[63,78,116,154,160],"(cache)":[64],"as":[66,68,83],"much":[67,286],"possible.":[69],"This":[70],"challenging,":[72],"especially":[73],"where":[76],"managed":[81],"explicitly,":[82],"different":[84,92,99,293],"rows":[85],"of":[86,94,101,188,264,281],"result":[88],"may":[89,111],"have":[90],"vastly":[91,98],"numbers":[93,100],"nonzero":[95],"elements":[96],"required":[104],"produce":[106],"row":[109],"not":[112],"fit":[113],"memory.":[117],"We":[118,176,205,245],"demonstrate":[119],"an":[120],"implementation":[122],"GPUs":[124],"perfectly":[126],"partitions":[127],"into":[131,311],"equal-size":[132],"blocks":[133],"such":[134],"each":[136,163],"block":[137,147,164],"maps":[138],"disjoint":[140],"locations.":[142],"In":[143],"way,":[145],"size":[148],"can":[149,165,181],"be":[150,166,182],"chosen":[151],"maximize":[153],"memory,":[155,161],"all":[156,212],"reductions":[157],"happen":[158],"processed":[167],"independently":[168],"without":[169],"communication":[170],"or":[171],"data":[172],"from":[173,211],"other":[174],"blocks.":[175],"show":[177,220],"how":[178],"partitioning":[180,192,237],"achieved":[183],"by":[184],"solving":[185,202],"many":[186],"instances":[187],"multiway":[190],"merge":[191],"problem.":[193,204],"There":[194],"are":[195],"several":[196],"algorithms":[197,215],"literature":[200],"for":[201,216,227,231],"present":[206],"mathematical":[208],"formulation,":[209],"missing":[210],"papers":[213],"providing":[214],"problem,":[218],"useful":[225],"framework":[226],"parallelizing":[228],"it":[229],"efficiently":[230],"GPUs.":[232],"To":[233],"our":[234,247],"knowledge,":[235],"scheme":[238],"has":[239],"never":[240],"been":[241],"applied":[242],"SpGEMM.":[244],"evaluate":[246],"implementation,":[249],"MMSpGEMM,":[250],"with":[251,292],"respect":[252],"state-of-the-art":[254],"including":[256],"cuSPARSE,":[257],"TileSpGEMM,":[258],"spECK,":[259],"AC-SpGEMM,":[261],"achieving":[262],"speedups":[263],"up":[265],"$5.3":[267],"x,":[268,270],"10.0":[269],"1.3":[271],"x$,":[272,275],"$1.1":[274],"respectively,":[276],"select":[279],"set":[280],"20":[282],"SuiteSparse":[283],"matrices,":[284],"higher":[287],"synthetic":[289],"multiplications":[291],"left":[294],"right":[296],"matrices.":[297],"Finally,":[298],"we":[299],"discuss":[300],"improvements":[301],"limitations":[303],"suggest":[305],"ways":[306],"incorporate":[308],"idea":[310],"more":[312],"general":[313],"routines.":[315]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-16T00:00:00"}
