{"id":"https://openalex.org/W2130304700","doi":"https://doi.org/10.1145/2304576.2304603","title":"Sparse matrix-vector multiply on the HICAMP architecture","display_name":"Sparse matrix-vector multiply on the HICAMP architecture","publication_year":2012,"publication_date":"2012-06-25","ids":{"openalex":"https://openalex.org/W2130304700","doi":"https://doi.org/10.1145/2304576.2304603","mag":"2130304700"},"language":"en","primary_location":{"id":"doi:10.1145/2304576.2304603","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2304576.2304603","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060491756","display_name":"John P. Stevenson","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]},{"id":"https://openalex.org/I1743320","display_name":"Palo Alto University","ror":"https://ror.org/04f812k67","country_code":"US","type":"education","lineage":["https://openalex.org/I1743320"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"John P. Stevenson","raw_affiliation_strings":["Stanford University, Palo Alto, CA, USA","Stanford University, Palo Alto, CA USA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"]},{"raw_affiliation_string":"Stanford University, Palo Alto, CA USA","institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033963442","display_name":"Amin Firoozshahian","orcid":"https://orcid.org/0009-0009-0128-298X"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amin Firoozshahian","raw_affiliation_strings":["HICAMP Systems, Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"HICAMP Systems, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000964076","display_name":"Alex Solomatnikov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Solomatnikov","raw_affiliation_strings":["HICAMP Systems, Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"HICAMP Systems, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090469068","display_name":"Mark Horowitz","orcid":"https://orcid.org/0000-0003-3245-7542"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]},{"id":"https://openalex.org/I1743320","display_name":"Palo Alto University","ror":"https://ror.org/04f812k67","country_code":"US","type":"education","lineage":["https://openalex.org/I1743320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Horowitz","raw_affiliation_strings":["Stanford University, Palo Alto, CA, USA","Stanford University, Palo Alto, CA USA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"]},{"raw_affiliation_string":"Stanford University, Palo Alto, CA USA","institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032416534","display_name":"David R. Cheriton","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Cheriton","raw_affiliation_strings":["Stanford University &amp; HICAMP Systems, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Stanford University &amp; HICAMP Systems, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5060491756"],"corresponding_institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":1.4503,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.825089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"195","last_page":"204"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8394185304641724},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.757021427154541},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5699279308319092},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.44425442814826965},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.4367595314979553},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.43167775869369507},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.42176929116249084},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3293219804763794},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3254573345184326}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8394185304641724},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.757021427154541},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5699279308319092},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.44425442814826965},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.4367595314979553},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.43167775869369507},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.42176929116249084},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3293219804763794},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3254573345184326},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2304576.2304603","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2304576.2304603","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1961751213","https://openalex.org/W1965551736","https://openalex.org/W1987393840","https://openalex.org/W1988311079","https://openalex.org/W1990832096","https://openalex.org/W2029108714","https://openalex.org/W2029663554","https://openalex.org/W2035080386","https://openalex.org/W2038509324","https://openalex.org/W2072806558","https://openalex.org/W2092013581","https://openalex.org/W2097964935","https://openalex.org/W2099625934","https://openalex.org/W2104120668","https://openalex.org/W2111667319","https://openalex.org/W2113921339","https://openalex.org/W2126004407","https://openalex.org/W2128853364","https://openalex.org/W2130289795","https://openalex.org/W2136324023","https://openalex.org/W2146530035","https://openalex.org/W2163027735","https://openalex.org/W2171697422","https://openalex.org/W3103104567"],"related_works":["https://openalex.org/W2384475851","https://openalex.org/W2000444236","https://openalex.org/W2353602216","https://openalex.org/W1541167181","https://openalex.org/W4389888442","https://openalex.org/W2149529325","https://openalex.org/W3103168911","https://openalex.org/W4308090217","https://openalex.org/W1981252059","https://openalex.org/W2120768584"],"abstract_inverted_index":{"Sparse":[0],"matrix-vector":[1],"multiply":[2],"(SpMV)":[3],"is":[4,31,94,144],"a":[5,82,114],"critical":[6],"task":[7],"in":[8,75],"the":[9,37,76,96,157],"inner":[10],"loop":[11],"of":[12,40,90,106,133],"modern":[13],"iterative":[14],"linear":[15],"system":[16],"solvers":[17],"and":[18,66,93,122],"exhibits":[19],"very":[20],"little":[21],"data":[22],"reuse.":[23],"This":[24,129],"low":[25],"reuse":[26],"means":[27],"that":[28,85],"its":[29],"performance":[30,71],"bounded":[32],"by":[33,150,156],"main-memory":[34],"bandwidth.":[35],"Moreover,":[36],"random":[38],"patterns":[39],"indirection":[41],"make":[42],"it":[43],"difficult":[44],"to":[45,146],"achieve":[46],"this":[47,148],"bound.":[48],"We":[49],"present":[50],"sparse":[51],"matrix":[52,83,91,103,116],"storage":[53],"formats":[54,60],"based":[55],"on":[56,113],"deduplicated":[57],"memory.":[58],"These":[59],"reduce":[61],"memory":[62],"traffic":[63],"during":[64],"SpMV":[65],"thus":[67],"show":[68],"significantly":[69],"improved":[70],"bounds:":[72],"90x":[73],"better":[74],"best":[77],"case.":[78],"Additionally,":[79],"we":[80],"introduce":[81],"format":[84],"inherently":[86],"exploits":[87],"any":[88,124],"amount":[89],"symmetry":[92],"at":[95],"same":[97],"time":[98],"fully":[99],"compatible":[100],"with":[101],"non-symmetric":[102],"code.":[104],"Because":[105],"this,":[107],"our":[108],"method":[109],"can":[110],"concurrently":[111],"operate":[112],"symmetric":[115],"without":[117,123],"complicated":[118],"work":[119],"partitioning":[120],"schemes":[121],"thread":[125],"synchronization":[126],"or":[127,165],"locking.":[128],"approach":[130],"takes":[131],"advantage":[132],"growing":[134],"processor":[135],"caches,":[136],"but":[137],"incurs":[138],"an":[139],"instruction":[140],"count":[141],"overhead.":[142],"It":[143],"feasible":[145],"overcome":[147],"issue":[149],"using":[151],"specialized":[152],"hardware":[153],"as":[154],"shown":[155],"recently":[158],"proposed":[159],"Hierarchical":[160],"Immutable":[161],"Content-Addressable":[162],"Memory":[163],"Processor,":[164],"HICAMP":[166],"architecture.":[167]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
