{"id":"https://openalex.org/W3202552726","doi":"https://doi.org/10.1145/3472456.3472479","title":"Regu2D: Accelerating Vectorization of SpMV on Intel Processors through 2D-partitioning and Regular Arrangement","display_name":"Regu2D: Accelerating Vectorization of SpMV on Intel Processors through 2D-partitioning and Regular Arrangement","publication_year":2021,"publication_date":"2021-08-09","ids":{"openalex":"https://openalex.org/W3202552726","doi":"https://doi.org/10.1145/3472456.3472479","mag":"3202552726"},"language":"en","primary_location":{"id":"doi:10.1145/3472456.3472479","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3472479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090837004","display_name":"Fei Xiang","orcid":"https://orcid.org/0000-0003-3644-7114"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiang Fei","raw_affiliation_strings":["Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016596981","display_name":"Youhui Zhang","orcid":"https://orcid.org/0000-0003-2333-3580"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youhui Zhang","raw_affiliation_strings":["Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5090837004"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.3817,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.80467024,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8395649790763855},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8198502659797668},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8007291555404663},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7034800052642822},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.6772007346153259},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6284857988357544},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.6282385587692261},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.6020444631576538},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5722888708114624},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5264578461647034},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5201854705810547},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.44428056478500366},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3767608404159546},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12697046995162964}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8395649790763855},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8198502659797668},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8007291555404663},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7034800052642822},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.6772007346153259},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6284857988357544},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.6282385587692261},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.6020444631576538},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5722888708114624},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5264578461647034},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5201854705810547},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.44428056478500366},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3767608404159546},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12697046995162964},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472456.3472479","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3472479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1506342804","https://openalex.org/W1563554946","https://openalex.org/W1653630692","https://openalex.org/W1965551736","https://openalex.org/W1990832096","https://openalex.org/W2009654791","https://openalex.org/W2035080386","https://openalex.org/W2101511474","https://openalex.org/W2126004407","https://openalex.org/W2128853364","https://openalex.org/W2154111453","https://openalex.org/W2769686377","https://openalex.org/W2788264912","https://openalex.org/W2790482734","https://openalex.org/W2791012218","https://openalex.org/W2908185957","https://openalex.org/W3096209535","https://openalex.org/W3158543914","https://openalex.org/W4243205343","https://openalex.org/W4407740500"],"related_works":["https://openalex.org/W2916039158","https://openalex.org/W2947212999","https://openalex.org/W947442053","https://openalex.org/W2613115449","https://openalex.org/W2800704601","https://openalex.org/W1985658314","https://openalex.org/W3202552726","https://openalex.org/W2887161653","https://openalex.org/W3175030460","https://openalex.org/W2020484966"],"abstract_inverted_index":{"Sparse":[0,127],"matrix-vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4,16],"an":[5,135,163],"elementary":[6],"kernel":[7],"of":[8,19,23,51,56,77,87,125,138,166],"many":[9],"high-performance":[10],"computing":[11],"(HPC)":[12],"applications,":[13],"and":[14,41,54,90,118,142,148,170],"it":[15],"often":[17],"one":[18],"the":[20,42,52,84,100,123],"performance":[21],"bottlenecks":[22],"them.":[24],"Accelerating":[25],"SpMV":[26,78],"on":[27,47,107],"vector":[28,44],"processors":[29,110],"usually":[30],"faces":[31],"several":[32],"issues":[33],"including":[34],"irregular":[35],"data":[36],"accesses,":[37],"memory":[38],"bandwidth":[39],"limitation,":[40],"short":[43],"problem.":[45],"Based":[46],"a":[48,71],"detailed":[49],"analysis":[50],"effects":[53],"interactions":[55],"various":[57],"technologies":[58],"introduced":[59],"by":[60],"state-of-the-art":[61],"studies":[62],"(ALBUS,":[63],"CVR,":[64,146],"CSR5,":[65,147],"SELL-C-\u03c3":[66,149],"etc.),":[67],"we":[68],"propose":[69],"Regu2D,":[70],"comprehensive":[72],"solution":[73],"to":[74,98],"accelerate":[75],"vectorization":[76],"through":[79],"three":[80],"methods:":[81],"adaptive":[82],"2D-partitioning,":[83],"regular":[85],"arrangement":[86],"matrix":[88],"elements,":[89],"indices":[91],"compression.":[92],"Dynamic":[93],"programming":[94],"algorithms":[95],"are":[96],"used":[97],"optimize":[99],"first":[101],"two":[102],"methods.":[103],"We":[104],"conduct":[105],"experiments":[106],"Intel":[108],"Xeon":[109],"(Skylake":[111],"architecture)":[112],"which":[113],"support":[114],"AVX-512":[115],"SIMD":[116],"instructions":[117],"use":[119],"sparse":[120,153,159],"matrices":[121],"from":[122],"University":[124],"Florida":[126],"Matrix":[128],"Collection.":[129],"Experiments":[130],"show":[131],"that":[132],"Regu2D":[133,161],"achieves":[134,162],"average":[136,164],"speedup":[137,165],"1.69X,":[139],"1.93X,":[140],"1.40X,":[141],"1.20X":[143],"over":[144,172],"ALBUS,":[145],"for":[150],"30":[151],"scale-free":[152],"matrices,":[154,160],"respectively.":[155,174],"For":[156],"16":[157],"HPC":[158],"1.34X,":[167,169],"1.89X,":[168],"1.50X":[171],"them,":[173]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
