{"id":"https://openalex.org/W2968028996","doi":"https://doi.org/10.1145/3341069.3341072","title":"Adaptive Sparse Matrix-Vector Multiplication on CPU-GPU Heterogeneous Architecture","display_name":"Adaptive Sparse Matrix-Vector Multiplication on CPU-GPU Heterogeneous Architecture","publication_year":2019,"publication_date":"2019-06-22","ids":{"openalex":"https://openalex.org/W2968028996","doi":"https://doi.org/10.1145/3341069.3341072","mag":"2968028996"},"language":"en","primary_location":{"id":"doi:10.1145/3341069.3341072","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3341069.3341072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 3rd High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077101275","display_name":"Nie Jing","orcid":null},"institutions":[{"id":"https://openalex.org/I2800710378","display_name":"Naval University of Engineering","ror":"https://ror.org/056vyez31","country_code":"CN","type":"education","lineage":["https://openalex.org/I2800710378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Nie","raw_affiliation_strings":["Naval University of Engineering, Wuhan, Hubei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Naval University of Engineering, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I2800710378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005304261","display_name":"Chunlei Zhang","orcid":"https://orcid.org/0000-0002-3851-2357"},"institutions":[{"id":"https://openalex.org/I2800710378","display_name":"Naval University of Engineering","ror":"https://ror.org/056vyez31","country_code":"CN","type":"education","lineage":["https://openalex.org/I2800710378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunlei Zhang","raw_affiliation_strings":["Naval University of Engineering, Wuhan, Hubei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Naval University of Engineering, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I2800710378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063040852","display_name":"Dan Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Zou","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676779","display_name":"Fei Xia","orcid":"https://orcid.org/0000-0001-6209-4477"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Xia","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771923","display_name":"Lina Lu","orcid":"https://orcid.org/0000-0003-1775-1831"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lina Lu","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100388976","display_name":"Xiang Wang","orcid":"https://orcid.org/0000-0001-5538-4310"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Wang","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329787","display_name":"Fei Zhao","orcid":"https://orcid.org/0009-0007-0499-0143"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Zhao","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7438,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74679388,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"6","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8553977012634277},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7566227912902832},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5683832764625549},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.5676999688148499},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.554668128490448},{"id":"https://openalex.org/keywords/cpu-shielding","display_name":"CPU shielding","score":0.5233192443847656},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.508543848991394},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5084089636802673},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.4922107756137848},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.4804393947124481},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.4590526819229126},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.446085125207901},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4352869987487793},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.12189996242523193},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0703410804271698}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8553977012634277},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7566227912902832},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5683832764625549},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.5676999688148499},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.554668128490448},{"id":"https://openalex.org/C180613757","wikidata":"https://www.wikidata.org/wiki/Q5013757","display_name":"CPU shielding","level":3,"score":0.5233192443847656},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.508543848991394},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5084089636802673},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.4922107756137848},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.4804393947124481},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.4590526819229126},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.446085125207901},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4352869987487793},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.12189996242523193},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0703410804271698},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3341069.3341072","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3341069.3341072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 3rd High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.4399999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W164384110","https://openalex.org/W1510543252","https://openalex.org/W1555620806","https://openalex.org/W1653630692","https://openalex.org/W1988888548","https://openalex.org/W1990832096","https://openalex.org/W2035080386","https://openalex.org/W2045920776","https://openalex.org/W2099625934","https://openalex.org/W2114977680","https://openalex.org/W2119547137","https://openalex.org/W2124007994","https://openalex.org/W2128131218","https://openalex.org/W2128853364","https://openalex.org/W2130289795","https://openalex.org/W2142356482","https://openalex.org/W2154118576","https://openalex.org/W2167868137","https://openalex.org/W2499905000","https://openalex.org/W2626696598","https://openalex.org/W2997945685","https://openalex.org/W4285719527","https://openalex.org/W6636930873"],"related_works":["https://openalex.org/W2387982802","https://openalex.org/W1896942098","https://openalex.org/W1991061790","https://openalex.org/W2088078730","https://openalex.org/W2400763249","https://openalex.org/W1482063109","https://openalex.org/W2350289853","https://openalex.org/W2391126280","https://openalex.org/W2330590072","https://openalex.org/W2001170981"],"abstract_inverted_index":{"SpMV":[0,58,151],"is":[1,12,23,135],"the":[2,7,24,39,67,71,85,89,97,124,140,168],"core":[3],"algorithm":[4,170],"in":[5,15,28,42],"solving":[6],"sparse":[8,104,164,182],"linear":[9],"equations,":[10],"which":[11,62],"widely":[13],"used":[14],"many":[16],"research":[17],"and":[18,32,76,79,99,129,146,157,176],"engineering":[19],"application":[20],"field.":[21],"GPU":[22,158],"most":[25],"common":[26],"coprocessor":[27],"high-performance":[29],"computing":[30,68],"domain,":[31],"has":[33,51],"already":[34],"been":[35,52],"proven":[36],"to":[37,55,84,122,138,178],"researchers":[38],"practical":[40],"value":[41],"accelerating":[43],"various":[44],"algorithms.":[45],"A":[46,131],"lot":[47],"of":[48,88,101,126,181],"reletead":[49],"work":[50],"carried":[53],"out":[54],"optimize":[56],"parallel":[57],"on":[59,65,70,108,162],"CPU-GPU":[60,109,120],"platforms,":[61],"mainly":[63],"focuses":[64],"reducing":[66],"overhead":[69,143],"GPU,":[72],"including":[73],"branch":[74],"divergence":[75],"cache":[77],"missing,":[78],"little":[80],"attention":[81],"was":[82],"paid":[83],"overall":[86],"efficiency":[87],"heterogeneous":[90,110],"platform.":[91],"In":[92],"this":[93],"paper,":[94],"we":[95],"describe":[96],"design":[98],"implementation":[100],"an":[102],"adaptive":[103],"matrix-vector":[105],"multiplication":[106],"(SpMV)":[107],"architecture.":[111],"We":[112],"propose":[113],"a":[114],"dynamic":[115],"task":[116],"scheduling":[117],"framework":[118],"for":[119,155],"platform":[121],"improve":[123],"utilization":[125],"both":[127,172],"CPU":[128,145,156],"GPU.":[130,147],"double":[132],"buffering":[133],"scheme":[134],"also":[136],"presented":[137],"hide":[139],"data":[141],"transfer":[142],"between":[144],"Two":[148],"deeply":[149],"optimized":[150],"kernels":[152],"are":[153],"deployed":[154],"respectively.":[159],"The":[160],"evaluation":[161],"typical":[163],"matrices":[165],"indicates":[166],"that":[167],"proposed":[169],"obtains":[171],"significant":[173],"performance":[174],"increase":[175],"adaptability":[177],"different":[179],"types":[180],"matrices.":[183]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
