{"id":"https://openalex.org/W4404112662","doi":"https://doi.org/10.1145/3703352","title":"ApSpGEMM: Accelerating Large-scale SpGEMM with Heterogeneous Collaboration and Adaptive Panel","display_name":"ApSpGEMM: Accelerating Large-scale SpGEMM with Heterogeneous Collaboration and Adaptive Panel","publication_year":2024,"publication_date":"2024-11-06","ids":{"openalex":"https://openalex.org/W4404112662","doi":"https://doi.org/10.1145/3703352"},"language":"en","primary_location":{"id":"doi:10.1145/3703352","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3703352","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3703352","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018644308","display_name":"Dezhong Yao","orcid":"https://orcid.org/0000-0003-0336-0522"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhong Yao","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-0336-0522","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115603213","display_name":"Sifan Zhao","orcid":"https://orcid.org/0009-0000-7735-3187"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sifan Zhao","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0000-7735-3187","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038161234","display_name":"Tongtong Liu","orcid":"https://orcid.org/0009-0003-1651-7784"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tongtong Liu","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0003-1651-7784","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101846482","display_name":"Gang Wu","orcid":"https://orcid.org/0000-0002-6615-0699"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Wu","raw_affiliation_strings":["National Super Computing Center in Zhengzhou, Zhengzhou University, Zhengzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6615-0699","affiliations":[{"raw_affiliation_string":"National Super Computing Center in Zhengzhou, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["School of computer science and technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-3934-7605","affiliations":[{"raw_affiliation_string":"School of computer science and technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3122,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61163426,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"22","issue":"1","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7687008380889893},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5712974071502686},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3217473328113556}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7687008380889893},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5712974071502686},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3217473328113556},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3703352","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3703352","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3703352","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3703352","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6049162292","display_name":"\u9762\u5411\u8fb9\u7f18\u73af\u5883\u7684\u6df1\u5ea6\u5b66\u4e60\u7cfb\u7edf\u5173\u952e\u6280\u672f\u7814\u7a76","funder_award_id":"62072204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1973918431","https://openalex.org/W1985263109","https://openalex.org/W1986844835","https://openalex.org/W2028303621","https://openalex.org/W2089437293","https://openalex.org/W2095249664","https://openalex.org/W2168931017","https://openalex.org/W2298920323","https://openalex.org/W2781614977","https://openalex.org/W2811247723","https://openalex.org/W2970435804","https://openalex.org/W2985041356","https://openalex.org/W3044796228","https://openalex.org/W3091817928","https://openalex.org/W3109984686","https://openalex.org/W3198975860","https://openalex.org/W4226418754","https://openalex.org/W4296709444","https://openalex.org/W4302010773","https://openalex.org/W4302216254","https://openalex.org/W4309195324","https://openalex.org/W4312191554","https://openalex.org/W4312676533","https://openalex.org/W4315783737","https://openalex.org/W4320729330","https://openalex.org/W4321608012","https://openalex.org/W4360831848","https://openalex.org/W4380356442","https://openalex.org/W4383860503"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"Sparse":[1],"General":[2],"Matrix-Matrix":[3],"multiplication":[4,34,104,186],"(SpGEMM)":[5],"is":[6,46,112,187],"a":[7],"fundamental":[8],"component":[9],"for":[10,99],"many":[11],"applications,":[12],"such":[13],"as":[14],"algebraic":[15],"multigrid":[16],"methods":[17],"(AMG),":[18],"graphic":[19],"processing,":[20],"and":[21,66,79,117,120,134,155],"deep":[22],"learning.":[23],"However,":[24],"the":[25,38,125,180],"unbearable":[26],"latency":[27,74,89],"of":[28,40,127,182],"computing":[29,73],"high-dimensional,":[30],"large-scale":[31,101,183],"sparse":[32,102,175,184],"matrix":[33,103,185],"on":[35,105,114,132,171],"GPUs":[36],"hinders":[37],"development":[39],"these":[41],"applications.":[42],"An":[43],"effective":[44],"approach":[45,166],"heterogeneous":[47,107,178],"cores":[48,86,145],"collaborative":[49,100],"computing,":[50],"but":[51],"this":[52,92],"method":[53],"must":[54],"address":[55],"three":[56],"aspects:":[57],"(1)":[58],"irregular":[59,67],"non-zero":[60,128],"elements":[61],"lead":[62],"to":[63,123,191],"load":[64,133],"imbalance":[65],"memory":[68,135],"access,":[69],"(2)":[70],"different":[71,85,174],"core":[72],"differences":[75],"reduce":[76],"computational":[77,147],"parallelism,":[78],"(3)":[80],"temporary":[81],"data":[82,153],"transfer":[83],"between":[84],"introduces":[87],"additional":[88],"overhead.":[90,159],"In":[91],"work,":[93],"we":[94],"propose":[95],"an":[96],"innovative":[97],"framework":[98],"CPU-GPU":[106],"cores,":[108,179],"named":[109],"ApSpGEMM.":[110],"ApSpGEMM":[111],"based":[113],"sparsity":[115],"rules":[116],"proposes":[118],"reordering":[119],"splitting":[121],"algorithms":[122],"eliminate":[124],"impact":[126],"element":[129],"distribution":[130],"features":[131],"access.":[136],"Then":[137],"adaptive":[138],"panels":[139],"allocation":[140],"with":[141,161,173],"affinity":[142],"constraints":[143],"among":[144],"improves":[146],"parallelism.":[148],"Finally,":[149],"carefully":[150],"arranged":[151],"asynchronous":[152],"transmission":[154],"computation":[156],"balance":[157],"communication":[158],"Compared":[160],"state-of-the-art":[162],"SpGEMM":[163],"methods,":[164],"our":[165],"provides":[167],"excellent":[168],"absolute":[169],"performance":[170],"matrices":[172],"structures.":[176],"On":[177],"GFlops":[181],"improved":[188],"by":[189],"2.25":[190],"7.21":[192],"times.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
