{"id":"https://openalex.org/W2535419693","doi":"https://doi.org/10.1145/2966986.2966987","title":"A data locality-aware design framework for reconfigurable sparse matrix-vector multiplication kernel","display_name":"A data locality-aware design framework for reconfigurable sparse matrix-vector multiplication kernel","publication_year":2016,"publication_date":"2016-10-18","ids":{"openalex":"https://openalex.org/W2535419693","doi":"https://doi.org/10.1145/2966986.2966987","mag":"2535419693"},"language":"en","primary_location":{"id":"doi:10.1145/2966986.2966987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2966986.2966987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th International Conference on Computer-Aided Design","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100728889","display_name":"Sicheng Li","orcid":"https://orcid.org/0000-0002-5856-1172"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sicheng Li","raw_affiliation_strings":["University of Pittsburgh","University of Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"University of Pittsburgh, United States","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103267478","display_name":"Yandan Wang","orcid":"https://orcid.org/0000-0002-0144-010X"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yandan Wang","raw_affiliation_strings":["Tsinghua University","University of Pittsburgh","University of Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"University of Pittsburgh, United States","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067226050","display_name":"Wujie Wen","orcid":"https://orcid.org/0000-0003-0011-0675"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wujie Wen","raw_affiliation_strings":["Florida International University","Florida International University, United States"],"affiliations":[{"raw_affiliation_string":"Florida International University","institution_ids":["https://openalex.org/I19700959"]},{"raw_affiliation_string":"Florida International University, United States","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100445061","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-6108-5157"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University","University of Pittsburgh","Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["University of Pittsburgh","University of Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"University of Pittsburgh, United States","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai Li","raw_affiliation_strings":["University of Pittsburgh","University of Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"University of Pittsburgh, United States","institution_ids":["https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100728889"],"corresponding_institution_ids":["https://openalex.org/I170201317"],"apc_list":null,"apc_paid":null,"fwci":0.6307,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.68403235,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8535428643226624},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8125494122505188},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6741860508918762},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6015027761459351},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.6007394790649414},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5424911975860596},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5236089825630188},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.46187424659729004},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.46110138297080994}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8535428643226624},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8125494122505188},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6741860508918762},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6015027761459351},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.6007394790649414},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5424911975860596},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5236089825630188},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.46187424659729004},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.46110138297080994},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2966986.2966987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2966986.2966987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th International Conference on Computer-Aided Design","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.9100000262260437}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W182691100","https://openalex.org/W1537260300","https://openalex.org/W1969102784","https://openalex.org/W1996500460","https://openalex.org/W2004455575","https://openalex.org/W2022819055","https://openalex.org/W2035080386","https://openalex.org/W2043754576","https://openalex.org/W2081874478","https://openalex.org/W2093053744","https://openalex.org/W2100327879","https://openalex.org/W2113921339","https://openalex.org/W2142496304","https://openalex.org/W2147634832","https://openalex.org/W2148570406","https://openalex.org/W2152567198","https://openalex.org/W2157244885","https://openalex.org/W4245683599"],"related_works":["https://openalex.org/W2052993554","https://openalex.org/W2046125858","https://openalex.org/W2293771254","https://openalex.org/W2886568922","https://openalex.org/W2039875226","https://openalex.org/W4221142455","https://openalex.org/W3121828480","https://openalex.org/W2914631005","https://openalex.org/W2123154672","https://openalex.org/W2032786851"],"abstract_inverted_index":{"Sparse":[0],"matrix-vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4,41,58,113,152],"an":[5,145],"important":[6],"computational":[7,36,147],"kernel":[8,57,125,165],"in":[9,91,184],"many":[10],"applications.":[11],"For":[12],"performance":[13,47],"improvement,":[14],"software":[15,96],"libraries":[16,40],"designated":[17],"for":[18,27,32,81],"SpMV":[19,56,83,124],"computation":[20,118],"have":[21],"been":[22],"introduced,":[23],"e.g.,":[24],"MKL":[25],"library":[26,31],"CPUs":[28],"and":[29,66,103,129,160,172,189,197],"cuSPARSE":[30],"GPUs.":[33],"However,":[34],"the":[35,44,53,62,88,100,117,122,131,135,193],"throughput":[37],"of":[38,55,110,137,149,158],"these":[39],"far":[42],"below":[43],"peak":[45],"floating-point":[46],"offered":[48],"by":[49,61,133],"hardware":[50,89],"platforms,":[51],"because":[52],"efficiency":[54,148],"greatly":[59],"constrained":[60],"limited":[63],"memory":[64,101],"bandwidth":[65],"irregular":[67],"data":[68,77],"access":[69],"patterns.":[70],"In":[71],"this":[72],"work,":[73],"we":[74],"propose":[75],"a":[76,106,153,167],"locality-aware":[78],"design":[79,180],"framework":[80],"FPGA-based":[82,164],"acceleration.":[84],"We":[85,120],"first":[86],"include":[87],"constraints":[90],"sparse":[92,139],"matrix":[93,140],"compression":[94],"at":[95],"level":[97],"to":[98,115],"regularize":[99],"allocation":[102],"accesses.":[104],"Moreover,":[105,178],"distributed":[107],"architecture":[108],"composed":[109],"processing":[111],"elements":[112],"developed":[114],"improve":[116],"parallelism.":[119],"implement":[121],"reconfigurable":[123],"on":[126,195],"Convey":[127],"HC-2<sup>ex</sup>":[128],"conduct":[130],"evaluation":[132],"using":[134],"University":[136],"Florida":[138],"collection.":[141],"The":[142],"experiments":[143],"demonstrate":[144],"average":[146],"48.2%,":[150],"which":[151],"lot":[154],"better":[155,191],"than":[156,176,192],"those":[157],"CPU":[159,196],"GPU":[161],"implementations.":[162],"Our":[163],"has":[166],"comparable":[168],"runtime":[169],"as":[170],"GPU,":[171,198],"achieves":[173],"2.1&#x00D7;":[174],"reduction":[175],"CPU.":[177],"our":[179],"obtains":[181],"substantial":[182],"saving":[183],"energy":[185],"consumption,":[186],"say,":[187],"9.3&#x00D7;":[188],"5.6&#x00D7;":[190],"implementations":[194],"respectively.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
