{"id":"https://openalex.org/W2766681651","doi":"https://doi.org/10.1109/hpec.2017.8091096","title":"Algorithm and hardware co-optimized solution for large SpMV problems","display_name":"Algorithm and hardware co-optimized solution for large SpMV problems","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2766681651","doi":"https://doi.org/10.1109/hpec.2017.8091096","mag":"2766681651"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2017.8091096","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013781918","display_name":"Fazle Sadi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fazle Sadi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004725890","display_name":"Larry Fileggi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Larry Fileggi","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, US"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, US","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062806943","display_name":"Franz Franchetti","orcid":"https://orcid.org/0000-0002-3529-8973"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Franz Franchetti","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5013781918"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.3519,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.82997976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8632521033287048},{"id":"https://openalex.org/keywords/application-specific-integrated-circuit","display_name":"Application-specific integrated circuit","score":0.5941383242607117},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5455663800239563},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5120068192481995},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.49658066034317017},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.43905436992645264},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3660646378993988},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3432604670524597},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.32636138796806335}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8632521033287048},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.5941383242607117},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5455663800239563},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5120068192481995},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.49658066034317017},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.43905436992645264},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3660646378993988},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3432604670524597},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.32636138796806335}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2017.8091096","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8999999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W131619556","https://openalex.org/W1562405179","https://openalex.org/W1948792255","https://openalex.org/W2013435193","https://openalex.org/W2025890876","https://openalex.org/W2028441625","https://openalex.org/W2035080386","https://openalex.org/W2038142281","https://openalex.org/W2075960179","https://openalex.org/W2088039427","https://openalex.org/W2095836023","https://openalex.org/W2103877122","https://openalex.org/W2114378634","https://openalex.org/W2128539477","https://openalex.org/W2128853364","https://openalex.org/W2148570406","https://openalex.org/W2150909864","https://openalex.org/W2163501979","https://openalex.org/W2167868137","https://openalex.org/W2169783318","https://openalex.org/W2270781678","https://openalex.org/W2534648526","https://openalex.org/W3146763006","https://openalex.org/W4243261006","https://openalex.org/W4249654426","https://openalex.org/W6633534558","https://openalex.org/W6682164510","https://openalex.org/W6684509964"],"related_works":["https://openalex.org/W2165367082","https://openalex.org/W4234886518","https://openalex.org/W1972641423","https://openalex.org/W611446063","https://openalex.org/W2389591058","https://openalex.org/W2382112581","https://openalex.org/W2077105843","https://openalex.org/W2770465587","https://openalex.org/W2086716781","https://openalex.org/W2022510519"],"abstract_inverted_index":{"Sparse":[0],"Matrix-Vector":[1],"multiplication":[2],"(SpMV)":[3],"is":[4,98],"a":[5,108],"fundamental":[6],"kernel":[7],"for":[8,49,64,85,120,168],"many":[9],"scientific":[10],"and":[11,17,172],"engineering":[12],"applications.":[13],"However,":[14,88],"SpMV":[15,51,66],"performance":[16],"efficiency":[18,171],"are":[19],"poor":[20],"on":[21],"commercial":[22,166],"of-the-shelf":[23],"(COTS)":[24],"architectures,":[25],"specially":[26],"when":[27],"the":[28,57,74,89,121,131,143,149],"data":[29,61,78],"size":[30],"exceeds":[31],"on-chip":[32],"memory":[33,83],"or":[34],"last":[35],"level":[36],"cache":[37],"(LLC).":[38],"In":[39],"this":[40],"work":[41],"we":[42,106],"present":[43],"an":[44,70,93,114],"algorithm":[45,71,91],"co-optimized":[46],"hardware":[47,109],"accelerator":[48,110,127],"large":[50],"problems.":[52],"We":[53,68],"start":[54],"with":[55,102,148,165],"exploring":[56],"basic":[58],"difference":[59],"in":[60,139],"transfer":[62,79],"characteristics":[63],"various":[65],"algorithms.":[67],"propose":[69,107],"that":[72,112],"requires":[73,92],"least":[75],"amount":[76],"of":[77,130,159],"while":[80],"ensuring":[81],"main":[82],"streaming":[84],"all":[86],"accesses.":[87],"proposed":[90,126,144],"efficient":[94],"multi-way":[95],"merge,":[96],"which":[97],"difficult":[99],"to":[100,141],"achieve":[101],"COTS":[103,163],"architectures.":[104],"Hence,":[105],"model":[111],"includes":[113],"Application":[115],"Specific":[116],"Integrated":[117],"Circuit":[118],"(ASIC)":[119],"muti-way":[122],"merge":[123],"operation.":[124],"The":[125],"incorporates":[128],"state":[129],"art":[132],"3D":[133],"stacked":[134],"High":[135],"Bandwidth":[136],"Memory":[137],"(HBM)":[138],"order":[140],"demonstrate":[142],"algorithm's":[145],"capability":[146],"coupled":[147],"latest":[150],"technologies.":[151],"Simulation":[152],"results":[153],"using":[154],"standard":[155],"benchmarks":[156],"show":[157],"improvements":[158],"over":[160],"100\u00d7":[161],"against":[162],"architectures":[164],"libraries":[167],"both":[169],"energy":[170],"performance.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
