{"id":"https://openalex.org/W2535926538","doi":"https://doi.org/10.1145/2994148","title":"A Cross-Platform SpMV Framework on Many-Core Architectures","display_name":"A Cross-Platform SpMV Framework on Many-Core Architectures","publication_year":2016,"publication_date":"2016-10-25","ids":{"openalex":"https://openalex.org/W2535926538","doi":"https://doi.org/10.1145/2994148","mag":"2535926538"},"language":"en","primary_location":{"id":"doi:10.1145/2994148","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2994148","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2994148&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2994148&type=pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001666028","display_name":"Yunquan Zhang","orcid":"https://orcid.org/0000-0002-2618-5088"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunquan Zhang","raw_affiliation_strings":["State Key Laboratory of Computer Architecture, Institute of Computing Technologies, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Architecture, Institute of Computing Technologies, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088172355","display_name":"Shigang Li","orcid":"https://orcid.org/0000-0003-0022-7865"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shigang Li","raw_affiliation_strings":["State Key Laboratory of Computer Architecture, Institute of Computing Technologies, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Architecture, Institute of Computing Technologies, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049910854","display_name":"Shengen Yan","orcid":"https://orcid.org/0009-0005-3858-7972"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengen Yan","raw_affiliation_strings":["SenseTime Group Limited, Department of Information Engineering, Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"SenseTime Group Limited, Department of Information Engineering, Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072755725","display_name":"Huiyang Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huiyang Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, North Carolina State University, Raleigh, NC"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, North Carolina State University, Raleigh, NC","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001666028"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":2.2398,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87720735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"13","issue":"4","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9200449585914612},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7199265956878662},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.6046127676963806},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5374119877815247},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.4973504841327667},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.44473668932914734},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.44136518239974976},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.4279515743255615},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4146854281425476},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.41367214918136597},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.32604047656059265},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.1320423185825348}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9200449585914612},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7199265956878662},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.6046127676963806},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5374119877815247},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.4973504841327667},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.44473668932914734},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.44136518239974976},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.4279515743255615},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4146854281425476},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.41367214918136597},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32604047656059265},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.1320423185825348},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2994148","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2994148","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2994148&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2994148","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2994148","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2994148&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1023919524","display_name":null,"funder_award_id":", Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1756773353","display_name":null,"funder_award_id":"61521","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1942418271","display_name":null,"funder_award_id":"61502450, 61432018, 61521092 and 61272136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2170153215","display_name":null,"funder_award_id":"2016YFB0200803","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G2849673663","display_name":null,"funder_award_id":"61432018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3435950671","display_name":null,"funder_award_id":"and Gr","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G384178317","display_name":null,"funder_award_id":"02008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4045548416","display_name":null,"funder_award_id":"61521092","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4276857174","display_name":null,"funder_award_id":"1521092","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5249178904","display_name":null,"funder_award_id":"Grant No. 6","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6244656024","display_name":null,"funder_award_id":"61502450","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6717345170","display_name":"Washington Internships for Students of Engineering (WISE)","funder_award_id":"0200803","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7377512700","display_name":null,"funder_award_id":"61272136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7795505501","display_name":"Collaborative Project: RISE - Research-based Interdisciplinary STEM Education","funder_award_id":"1432018","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8114646031","display_name":null,"funder_award_id":"2016Y","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8380396287","display_name":null,"funder_award_id":"61521092","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2535926538.pdf","grobid_xml":"https://content.openalex.org/works/W2535926538.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W839792104","https://openalex.org/W1508286210","https://openalex.org/W1537323515","https://openalex.org/W1588915715","https://openalex.org/W1964704819","https://openalex.org/W1984222112","https://openalex.org/W1985938427","https://openalex.org/W1987840949","https://openalex.org/W1995649289","https://openalex.org/W2009654791","https://openalex.org/W2019708326","https://openalex.org/W2023930909","https://openalex.org/W2035080386","https://openalex.org/W2043105874","https://openalex.org/W2087977509","https://openalex.org/W2088866486","https://openalex.org/W2101511474","https://openalex.org/W2103877122","https://openalex.org/W2111667319","https://openalex.org/W2119547137","https://openalex.org/W2128539477","https://openalex.org/W2128853364","https://openalex.org/W2151285624","https://openalex.org/W2154118576","https://openalex.org/W2263374743","https://openalex.org/W2407908567","https://openalex.org/W2518567779","https://openalex.org/W2997945685","https://openalex.org/W4243261006","https://openalex.org/W4248149126","https://openalex.org/W4285719527","https://openalex.org/W6678107477"],"related_works":["https://openalex.org/W3150370983","https://openalex.org/W2239119680","https://openalex.org/W3022016791","https://openalex.org/W2963207152","https://openalex.org/W1638830944","https://openalex.org/W1564887326","https://openalex.org/W2185760795","https://openalex.org/W1980667043","https://openalex.org/W2044409366","https://openalex.org/W2494722835"],"abstract_inverted_index":{"Sparse":[0],"Matrix-Vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4],"a":[5,57,113,145],"key":[6],"operation":[7],"in":[8,21,190],"engineering":[9],"and":[10,29,49,171,179,203,213],"scientific":[11],"computing.":[12],"Although":[13],"the":[14,34,69,80,85,95,107,149],"previous":[15],"work":[16],"has":[17,144,214],"shown":[18],"impressive":[19],"progress":[20],"optimizing":[22],"SpMV":[23,59,151],"on":[24,165,167,177,184,186,201,208,210,220],"many-core":[25,52],"architectures,":[26],"load":[27,108],"imbalance":[28,109],"high":[30],"memory":[31],"bandwidth":[32,86],"remain":[33],"critical":[35],"performance":[36,216],"bottlenecks.":[37],"We":[38,88],"present":[39],"our":[40,141,156,193],"novel":[41],"solutions":[42],"to":[43,78,83,105,133],"these":[44],"problems,":[45],"for":[46,100,120],"both":[47],"GPUs":[48],"Intel":[50,221],"MIC":[51],"architectures.":[53],"First,":[54],"we":[55,111,128],"devise":[56],"new":[58],"format,":[60],"called":[61],"Blocked":[62],"Compressed":[63],"Common":[64,71],"Coordinate":[65,72],"(BCCOO).":[66],"BCCOO":[67],"extends":[68],"blocked":[70],"(COO)":[73],"by":[74,93,163,175,182,199,206],"using":[75],"bit":[76],"flags":[77],"store":[79],"row":[81],"indices":[82],"alleviate":[84],"problem.":[87],"further":[89],"improve":[90],"this":[91],"format":[92,162],"partitioning":[94],"matrix":[96],"into":[97],"vertical":[98],"slices":[99],"better":[101],"data":[102],"locality.":[103],"Then,":[104],"address":[106],"problem,":[110],"propose":[112],"highly":[114],"efficient":[115],"matrix-based":[116],"segmented":[117],"sum/scan":[118],"algorithm":[119],"SpMV,":[121],"which":[122],"eliminates":[123],"global":[124],"synchronization.":[125],"At":[126],"last,":[127],"introduce":[129],"an":[130],"autotuning":[131],"framework":[132,143],"choose":[134],"optimization":[135],"parameters.":[136],"Experimental":[137],"results":[138],"show":[139],"that":[140],"proposed":[142,157,194],"significant":[146],"advantage":[147],"over":[148],"existing":[150],"libraries.":[152],"In":[153],"single":[154],"precision,":[155,192],"scheme":[158,195],"outperforms":[159,172,180,196,204],"clSpMV":[160],"COCKTAIL":[161],"255%":[164],"average":[166,178,185,202,209],"AMD":[168],"FirePro":[169],"W8000,":[170],"CUSPARSE":[173,197],"V7.0":[174,198],"73.7%":[176],"CSR5":[181,205,219],"53.6%":[183],"GeForce":[187],"Titan":[188],"X;":[189],"double":[191],"34.0%":[200],"16.2%":[207],"Tesla":[211],"K20,":[212],"equivalent":[215],"compared":[217],"with":[218],"MIC.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
