{"id":"https://openalex.org/W3139495005","doi":"https://doi.org/10.1109/cgo51591.2021.9370327","title":"Variable-Sized Blocks for Locality-Aware SpMV","display_name":"Variable-Sized Blocks for Locality-Aware SpMV","publication_year":2021,"publication_date":"2021-02-27","ids":{"openalex":"https://openalex.org/W3139495005","doi":"https://doi.org/10.1109/cgo51591.2021.9370327","mag":"3139495005"},"language":"en","primary_location":{"id":"doi:10.1109/cgo51591.2021.9370327","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370327","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086236412","display_name":"Naveen Namashivavam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122178","display_name":"Hewlett Packard Enterprise (United States)","ror":"https://ror.org/020x0c621","country_code":"US","type":"company","lineage":["https://openalex.org/I4210122178"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Naveen Namashivavam","raw_affiliation_strings":["Hewlett Packard Enterprise, USA"],"affiliations":[{"raw_affiliation_string":"Hewlett Packard Enterprise, USA","institution_ids":["https://openalex.org/I4210122178"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102742730","display_name":"Sanyam Mehta","orcid":"https://orcid.org/0009-0005-5319-689X"},"institutions":[{"id":"https://openalex.org/I4210122178","display_name":"Hewlett Packard Enterprise (United States)","ror":"https://ror.org/020x0c621","country_code":"US","type":"company","lineage":["https://openalex.org/I4210122178"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanyam Mehta","raw_affiliation_strings":["Hewlett Packard Enterprise, USA"],"affiliations":[{"raw_affiliation_string":"Hewlett Packard Enterprise, USA","institution_ids":["https://openalex.org/I4210122178"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052005800","display_name":"Pen-Chung Yew","orcid":"https://orcid.org/0000-0001-9653-8777"},"institutions":[{"id":"https://openalex.org/I4210101327","display_name":"Twin Cities Orthopedics","ror":"https://ror.org/01en4s460","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210101327"]},{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pen-Chung Yew","raw_affiliation_strings":["University of Minnesota-Twin Cities, USA"],"affiliations":[{"raw_affiliation_string":"University of Minnesota-Twin Cities, USA","institution_ids":["https://openalex.org/I4210101327","https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086236412"],"corresponding_institution_ids":["https://openalex.org/I4210122178"],"apc_list":null,"apc_paid":null,"fwci":2.5331,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.89271063,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"211","last_page":"221"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8159079551696777},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7938724756240845},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6761735677719116},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.645136296749115},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6120871305465698},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5742425322532654},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.5729421377182007},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5435996651649475},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5141001343727112},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.448652982711792},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.4316326379776001},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.42930108308792114},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.41798636317253113},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.41086846590042114},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33717626333236694},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1600261628627777},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14507240056991577}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8159079551696777},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7938724756240845},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6761735677719116},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.645136296749115},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6120871305465698},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5742425322532654},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.5729421377182007},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5435996651649475},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5141001343727112},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.448652982711792},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.4316326379776001},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.42930108308792114},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.41798636317253113},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.41086846590042114},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33717626333236694},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1600261628627777},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14507240056991577},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cgo51591.2021.9370327","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370327","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W182691100","https://openalex.org/W1506342804","https://openalex.org/W1588915715","https://openalex.org/W1884140786","https://openalex.org/W1965551736","https://openalex.org/W1971343078","https://openalex.org/W1975116854","https://openalex.org/W1981902599","https://openalex.org/W1987840949","https://openalex.org/W1990832096","https://openalex.org/W1993704253","https://openalex.org/W2009215380","https://openalex.org/W2009654791","https://openalex.org/W2035080386","https://openalex.org/W2078095679","https://openalex.org/W2084296319","https://openalex.org/W2101511474","https://openalex.org/W2111667319","https://openalex.org/W2121082877","https://openalex.org/W2128853364","https://openalex.org/W2130289795","https://openalex.org/W2140153041","https://openalex.org/W2151285624","https://openalex.org/W2162283062","https://openalex.org/W2162630236","https://openalex.org/W2518567779","https://openalex.org/W2603231629","https://openalex.org/W2755088640","https://openalex.org/W2782176700","https://openalex.org/W2791012218","https://openalex.org/W4243205343","https://openalex.org/W6678286823","https://openalex.org/W6735889570"],"related_works":["https://openalex.org/W2214459866","https://openalex.org/W2100952904","https://openalex.org/W1507301366","https://openalex.org/W3199219623","https://openalex.org/W4224226906","https://openalex.org/W2620569691","https://openalex.org/W1982324446","https://openalex.org/W3202552726","https://openalex.org/W2170268965","https://openalex.org/W2536676298"],"abstract_inverted_index":{"Blocking":[0],"is":[1],"an":[2,34,131,138],"important":[3],"optimization":[4],"option":[5],"available":[6],"to":[7,37,71,128],"mitigate":[8],"the":[9,15,39,50,55,61,102,143],"data":[10,92],"movement":[11],"overhead":[12],"and":[13,135,157],"improve":[14],"temporal":[16],"locality":[17],"in":[18,54,73,149],"SpMV,":[19],"a":[20,59,88,124],"sparse":[21,46,56],"BLAS":[22],"kernel":[23],"with":[24,107],"irregular":[25,45],"memory":[26],"reference":[27],"pattern.":[28],"In":[29],"this":[30],"work,":[31],"we":[32],"propose":[33],"analytical":[35],"model":[36],"determine":[38],"effective":[40],"block":[41],"size":[42],"for":[43],"highly":[44,144],"matrices":[47,114],"by":[48,64],"factoring":[49],"distribution":[51],"of":[52,104,113,126,133,140],"non-zeros":[53],"dataset.":[57],"As":[58],"result,":[60],"blocks":[62,109],"generated":[63],"our":[65,80],"scheme":[66,82],"are":[67],"variable-sized":[68,108],"as":[69],"opposed":[70],"constant-sized":[72],"most":[74],"existing":[75],"SpMV":[76,106,147],"algorithms.":[77],"We":[78,100],"demonstrate":[79],"blocking":[81],"using":[83,110],"Compressed":[84],"Vector":[85],"Blocks":[86],"(CVB),":[87],"new":[89],"column-based":[90],"blocked":[91],"format,":[93],"on":[94,155],"Intel":[95,159],"Xeon":[96,160],"Skylake-X":[97],"multicore":[98],"processor.":[99],"evaluated":[101],"performance":[103],"CVB-based":[105],"extensive":[111],"set":[112],"from":[115],"Stanford":[116],"Network":[117],"Analysis":[118],"Platform":[119],"(SNAP).":[120],"Our":[121],"evaluation":[122],"shows":[123],"speedup":[125],"up":[127],"2.62X":[129],"(with":[130,137],"average":[132,139],"1.73X)":[134],"2.02X":[136],"1.18X)":[141],"over":[142],"vendor":[145],"tuned":[146],"implementation":[148],"Intel's":[150],"Math":[151],"Kernel":[152],"Library":[153],"(MKL)":[154],"single":[156],"multiple":[158],"cores":[161],"respectively.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
