{"id":"https://openalex.org/W4392894994","doi":"https://doi.org/10.1145/3653020","title":"Cerberus: Triple Mode Acceleration of Sparse Matrix and Vector Multiplication","display_name":"Cerberus: Triple Mode Acceleration of Sparse Matrix and Vector Multiplication","publication_year":2024,"publication_date":"2024-03-17","ids":{"openalex":"https://openalex.org/W4392894994","doi":"https://doi.org/10.1145/3653020"},"language":"en","primary_location":{"id":"doi:10.1145/3653020","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653020","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653020","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3653020","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013858915","display_name":"Soojin Hwang","orcid":"https://orcid.org/0009-0005-0886-6794"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Soojin Hwang","raw_affiliation_strings":["KAIST, Yuseong-gu, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0005-0886-6794","affiliations":[{"raw_affiliation_string":"KAIST, Yuseong-gu, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028282859","display_name":"Daehyeon Baek","orcid":"https://orcid.org/0009-0008-0460-3809"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Daehyeon Baek","raw_affiliation_strings":["KAIST, Yuseong-gu Republic of Korea","KAIST, Yuseong-gu, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0008-0460-3809","affiliations":[{"raw_affiliation_string":"KAIST, Yuseong-gu Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"KAIST, Yuseong-gu, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037553165","display_name":"Jongse Park","orcid":"https://orcid.org/0000-0002-6629-449X"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongse Park","raw_affiliation_strings":["KAIST, Yuseong-gu Republic of Korea","KAIST, Yuseong-gu, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-6629-449X","affiliations":[{"raw_affiliation_string":"KAIST, Yuseong-gu Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"KAIST, Yuseong-gu, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047149607","display_name":"Jaehyuk Huh","orcid":"https://orcid.org/0000-0002-1742-047X"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaehyuk Huh","raw_affiliation_strings":["KAIST, Yuseong-gu Republic of Korea","KAIST, Yuseong-gu, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-1742-047X","affiliations":[{"raw_affiliation_string":"KAIST, Yuseong-gu Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"KAIST, Yuseong-gu, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9614,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.75877591,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"21","issue":"2","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9714000225067139,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9714000225067139,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9685999751091003,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9635999798774719,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7819406986236572},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6789610981941223},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.6506766676902771},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5197839140892029},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5184523463249207},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.47360873222351074},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4662114083766937},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.4627179503440857},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.42282742261886597},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.38008439540863037},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.23314768075942993},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2142808735370636},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.1161094605922699},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.09709417819976807},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07931894063949585}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7819406986236572},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6789610981941223},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.6506766676902771},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5197839140892029},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5184523463249207},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.47360873222351074},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4662114083766937},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.4627179503440857},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42282742261886597},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.38008439540863037},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.23314768075942993},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2142808735370636},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1161094605922699},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.09709417819976807},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07931894063949585},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3653020","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653020","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653020","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3653020","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653020","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653020","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G12967831","display_name":null,"funder_award_id":"IITP2017-0-00466","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392894994.pdf","grobid_xml":"https://content.openalex.org/works/W4392894994.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W424435301","https://openalex.org/W1583837637","https://openalex.org/W1990832096","https://openalex.org/W2035080386","https://openalex.org/W2063927241","https://openalex.org/W2091883426","https://openalex.org/W2093053744","https://openalex.org/W2115052535","https://openalex.org/W2119144962","https://openalex.org/W2278138779","https://openalex.org/W2285660444","https://openalex.org/W2414288268","https://openalex.org/W2559809597","https://openalex.org/W2625457103","https://openalex.org/W2808128431","https://openalex.org/W2883929540","https://openalex.org/W2963594949","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2979747168","https://openalex.org/W3005077654","https://openalex.org/W3007566156","https://openalex.org/W3094263445","https://openalex.org/W3103168911","https://openalex.org/W3110597019","https://openalex.org/W3115280429","https://openalex.org/W3125710003","https://openalex.org/W4234592442","https://openalex.org/W4247176905","https://openalex.org/W4281287226","https://openalex.org/W4308083513","https://openalex.org/W4308083753","https://openalex.org/W4312650633","https://openalex.org/W6906894776"],"related_works":["https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W752783541","https://openalex.org/W1506547947","https://openalex.org/W2995605830","https://openalex.org/W4206811032","https://openalex.org/W2086123442","https://openalex.org/W2596457687","https://openalex.org/W3212757063","https://openalex.org/W2093666864"],"abstract_inverted_index":{"The":[0,29],"multiplication":[1,55],"of":[2,10,32,83,112],"sparse":[3,26,64,68,144,175,213],"matrix":[4,40,74,160,197,214],"and":[5,39,49,56,69,75,103,120,146,218,235],"vector":[6],"(SpMV)":[7],"is":[8,88,95],"one":[9,82,98],"the":[11,73,78,84,96,110,118,132,138,159,164,181,207],"most":[12],"widely":[13,46],"used":[14,47],"kernels":[15],"in":[16,178],"high-performance":[17],"computing":[18],"as":[19,21],"well":[20],"machine":[22],"learning":[23],"acceleration":[24],"for":[25,72,210],"neural":[27],"networks.":[28],"design":[30,94,113,129],"space":[31],"SpMV":[33,242],"accelerators":[34,80],"has":[35],"two":[36,45,63,139,174],"axes:":[37],"algorithm":[38,119],"representation.":[41,122],"There":[42],"have":[43,152],"been":[44],"algorithms":[48],"data":[50,65,121],"representations.":[51],"Two":[52],"algorithms,":[53],"scalar":[54],"dot":[57,150],"product,":[58],"can":[59],"be":[60,91],"combined":[61],"with":[62,115,149,155,215],"representations,":[66],"compressed":[67,143],"bitmap":[70,147],"formats":[71],"vector.":[76],"Although":[77],"prior":[79],"adopted":[81],"possible":[85],"designs,":[86],"it":[87,190],"yet":[89],"to":[90,117,180],"investigated":[92],"which":[93,204],"best":[97,140,208,241],"across":[99,134],"different":[100,135],"hardware":[101,202],"resources":[102],"workload":[104],"characteristics.":[105,161],"This":[106],"paper":[107],"first":[108],"investigates":[109],"impact":[111],"choices":[114],"respect":[116],"Our":[123,221],"evaluation":[124],"shows":[125],"that":[126,225],"no":[127],"single":[128],"always":[130],"outperforms":[131],"others":[133],"workloads,":[136],"but":[137],"designs":[141],"(i.e.,":[142],"format":[145,148],"product)":[151],"complementary":[153],"performance":[154,229],"trade-offs":[156],"incurred":[157],"by":[158],"Based":[162],"on":[163,196],"analysis,":[165],"this":[166],"study":[167],"proposes":[168,191],"Cerberus,":[169],"a":[170,192,200,211,232,239],"triple-mode":[171],"accelerator":[172],"supporting":[173],"operation":[176],"modes":[177],"addition":[179],"base":[182],"dense":[183],"mode.":[184],"To":[185],"allow":[186],"such":[187],"multi-mode":[188],"operation,":[189],"prediction":[193],"model":[194],"based":[195],"characteristics":[198],"under":[199],"given":[201,212],"configuration,":[203],"statically":[205],"selects":[206],"mode":[209],"its":[216],"dimension":[217],"density":[219],"information.":[220],"experimental":[222],"results":[223],"show":[224],"Cerberus":[226],"provides":[227],"12.1\u00d7":[228],"improvements":[230,237],"from":[231,238],"dense-only":[233],"accelerator,":[234],"1.5\u00d7":[236],"fixed":[240],"design.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
