{"id":"https://openalex.org/W2594491068","doi":"https://doi.org/10.1109/tcad.2017.2681072","title":"Scale-Free Sparse Matrix-Vector Multiplication on Many-Core Architectures","display_name":"Scale-Free Sparse Matrix-Vector Multiplication on Many-Core Architectures","publication_year":2017,"publication_date":"2017-03-10","ids":{"openalex":"https://openalex.org/W2594491068","doi":"https://doi.org/10.1109/tcad.2017.2681072","mag":"2594491068"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2017.2681072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2017.2681072","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Center for Energy-Efficient Computing and Applications, School of Electronics Engineering and Computer Science, Peking University, Beijing, China","Collaborative Innovation Center of High Performance Computing, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-9076-7998","affiliations":[{"raw_affiliation_string":"Center for Energy-Efficient Computing and Applications, School of Electronics Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Collaborative Innovation Center of High Performance Computing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103180941","display_name":"Wai Teng Tang","orcid":"https://orcid.org/0000-0002-6553-1270"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wai Teng Tang","raw_affiliation_strings":["A*STAR Institute of High Performance Computing, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"A*STAR Institute of High Performance Computing, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103214318","display_name":"Rongxuan Zhao","orcid":"https://orcid.org/0009-0001-9705-3701"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruizhe Zhao","raw_affiliation_strings":["Center for Energy-Efficient Computing and Applications, School of EECS, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Energy-Efficient Computing and Applications, School of EECS, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103114808","display_name":"Mian Lu","orcid":"https://orcid.org/0009-0002-4853-8881"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Mian Lu","raw_affiliation_strings":["A*STAR Institute of High Performance Computing, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"A*STAR Institute of High Performance Computing, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077959628","display_name":"Huynh Phung Huynh","orcid":null},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Huynh Phung Huynh","raw_affiliation_strings":["A*STAR Institute of High Performance Computing, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"A*STAR Institute of High Performance Computing, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001599295","display_name":"Rick Siow Mong Goh","orcid":"https://orcid.org/0000-0001-9116-1595"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Rick Siow Mong Goh","raw_affiliation_strings":["A*STAR Institute of High Performance Computing, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"A*STAR Institute of High Performance Computing, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100604860"],"corresponding_institution_ids":["https://openalex.org/I170215575","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.6937,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.67530523,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"36","issue":"12","first_page":"2106","last_page":"2119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8855962753295898},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.805892825126648},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7284414768218994},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6715786457061768},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.532470703125},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.5233016610145569},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.4919714629650116},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.45589473843574524},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4327276647090912},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.41655418276786804}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8855962753295898},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.805892825126648},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7284414768218994},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6715786457061768},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.532470703125},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.5233016610145569},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.4919714629650116},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.45589473843574524},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4327276647090912},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.41655418276786804},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2017.2681072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2017.2681072","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320698","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335561","display_name":"Institute of Computing Technology, Chinese Academy of Sciences","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W170802544","https://openalex.org/W348433680","https://openalex.org/W1482680420","https://openalex.org/W1506342804","https://openalex.org/W1588915715","https://openalex.org/W1653630692","https://openalex.org/W1776498962","https://openalex.org/W1854214752","https://openalex.org/W1965551736","https://openalex.org/W1972077953","https://openalex.org/W1981902599","https://openalex.org/W1982020565","https://openalex.org/W1982996921","https://openalex.org/W1983235612","https://openalex.org/W1987840949","https://openalex.org/W1990832096","https://openalex.org/W2008620264","https://openalex.org/W2009654791","https://openalex.org/W2023930909","https://openalex.org/W2027806965","https://openalex.org/W2035080386","https://openalex.org/W2048441570","https://openalex.org/W2052289288","https://openalex.org/W2084309410","https://openalex.org/W2084446096","https://openalex.org/W2087507944","https://openalex.org/W2088866486","https://openalex.org/W2090584832","https://openalex.org/W2096661534","https://openalex.org/W2101511474","https://openalex.org/W2113282196","https://openalex.org/W2128539477","https://openalex.org/W2128853364","https://openalex.org/W2129232868","https://openalex.org/W2152517358","https://openalex.org/W2162283062","https://openalex.org/W2167334577","https://openalex.org/W2182219686","https://openalex.org/W2232645663","https://openalex.org/W2263374743","https://openalex.org/W2344044374","https://openalex.org/W2518567779","https://openalex.org/W3137551601","https://openalex.org/W4237024478","https://openalex.org/W4243205343","https://openalex.org/W4247712932","https://openalex.org/W6636930873","https://openalex.org/W6639055396"],"related_works":["https://openalex.org/W2213533160","https://openalex.org/W2467043670","https://openalex.org/W4252450863","https://openalex.org/W2085105049","https://openalex.org/W3203561460","https://openalex.org/W3009624197","https://openalex.org/W4251138667","https://openalex.org/W2682544458","https://openalex.org/W1969709731","https://openalex.org/W2022666014"],"abstract_inverted_index":{"Sparse":[0],"matrix-vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4,86],"one":[5],"of":[6,21,75,103,140,213],"the":[7,19,69,79,83,101,116,126,141,148,157,164,180,185,198,221,226,238],"most":[8],"important":[9],"kernels":[10],"for":[11,23,44,132],"many":[12],"applications.":[13],"In":[14,172],"this":[15],"paper,":[16],"we":[17,150,177],"study":[18],"implementation":[20,43,52,85,128,155,229,241],"SpMV":[22,56],"scale-free":[24,215],"matrices":[25,216],"on":[26,189,233,245],"many-core":[27,46],"architectures":[28],"including":[29],"graphic":[30],"processing":[31],"units":[32,188],"and":[33,77,119,196,205],"Xeon":[34,165,190,246],"Phi":[35,166,191],"coprocessors.":[36],"We":[37,96,162],"first":[38],"propose":[39,151],"a":[40,54,72,152,169,210],"hardware":[41,105,153,174,223,227,239],"oblivious":[42,106,224],"heterogeneous":[45,133],"processors":[47],"using":[48,156,209],"OpenCL.":[49],"Our":[50],"OpenCL":[51,84,127],"uses":[53],"novel":[55],"format":[57,182],"called":[58],"hybrid":[59],"COO+CSR":[60],"(HCC),":[61],"which":[62,91],"employs":[63],"2-D":[64],"jagged":[65],"partitioning":[66],"to":[67,88,99,115,137],"balance":[68],"workload":[70],"among":[71],"large":[73],"number":[74],"cores":[76],"improve":[78,147,197],"data":[80],"locality.":[81],"Moreover,":[82],"designed":[87],"be":[89],"parametric,":[90],"allows":[92],"systematic":[93],"performance":[94,114,200],"tuning.":[95],"conduct":[97],"experiments":[98],"evaluate":[100],"efficiency":[102],"our":[104,173],"implementation.":[107,124],"Experiments":[108,208],"show":[109],"that":[110,179,218],"it":[111,135],"achieves":[112,230,242],"comparable":[113],"Intel":[117],"MKL":[118],"state-of-the-art":[120],"OpenCL-based":[121,222],"ViennaCL":[122],"library":[123],"Although":[125],"provides":[129],"functional":[130],"portability":[131],"systems,":[134],"fails":[136],"take":[138],"advantage":[139],"low-level":[142,194],"architectural":[143],"features.":[144],"To":[145],"further":[146],"performance,":[149],"conscious":[154,175,228,240],"native":[158],"parallel":[159],"programming":[160],"language.":[161],"use":[163],"platform":[167],"as":[168],"case":[170],"study.":[171],"implementation,":[176,225],"ensure":[178],"HCC":[181],"efficiently":[183],"utilizes":[184],"vector":[186],"process":[187],"by":[192],"employing":[193],"intrinsics,":[195],"overall":[199],"through":[201],"locality-aware":[202],"block":[203],"mapping,":[204],"intrablock":[206],"tiling.":[207],"wide":[211],"range":[212],"representative":[214],"demonstrate":[217],"compared":[219],"with":[220,236],"2.2\u00d7":[231],"speedup":[232,244],"average.":[234],"Compared":[235],"MKL,":[237],"3.1\u00d7":[243],"Phi.":[247]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
