{"id":"https://openalex.org/W2751387202","doi":"https://doi.org/10.1109/icpp.2017.38","title":"Performance Analysis and Optimization of Sparse Matrix-Vector Multiplication on Modern Multi- and Many-Core Processors","display_name":"Performance Analysis and Optimization of Sparse Matrix-Vector Multiplication on Modern Multi- and Many-Core Processors","publication_year":2017,"publication_date":"2017-08-01","ids":{"openalex":"https://openalex.org/W2751387202","doi":"https://doi.org/10.1109/icpp.2017.38","mag":"2751387202"},"language":"en","primary_location":{"id":"doi:10.1109/icpp.2017.38","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpp.2017.38","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 46th International Conference on Parallel Processing (ICPP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1711.05487","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062532056","display_name":"Athena Elafrou","orcid":null},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Athena Elafrou","raw_affiliation_strings":["School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023870696","display_name":"Georgios Goumas","orcid":"https://orcid.org/0000-0001-7811-4831"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Georgios Goumas","raw_affiliation_strings":["School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023526161","display_name":"Nectarios Koziris","orcid":"https://orcid.org/0000-0002-4890-8427"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Nectarios Koziris","raw_affiliation_strings":["School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062532056"],"corresponding_institution_ids":["https://openalex.org/I174458059"],"apc_list":null,"apc_paid":null,"fwci":3.4731,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.94169468,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"292","last_page":"301"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8172770142555237},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7734547853469849},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7331666946411133},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7189670205116272},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5626799464225769},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.54005366563797},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.455009400844574},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.44817012548446655},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.44233018159866333},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.435558944940567},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.13795828819274902},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.09626802802085876},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07934045791625977}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8172770142555237},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7734547853469849},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7331666946411133},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7189670205116272},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5626799464225769},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.54005366563797},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.455009400844574},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.44817012548446655},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.44233018159866333},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.435558944940567},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.13795828819274902},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.09626802802085876},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07934045791625977},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icpp.2017.38","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpp.2017.38","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 46th International Conference on Parallel Processing (ICPP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1711.05487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1711.05487","pdf_url":"https://arxiv.org/pdf/1711.05487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1711.05487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1711.05487","pdf_url":"https://arxiv.org/pdf/1711.05487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1554130893","https://openalex.org/W1563653928","https://openalex.org/W1884140786","https://openalex.org/W1965034778","https://openalex.org/W1965551736","https://openalex.org/W1981557297","https://openalex.org/W1981902599","https://openalex.org/W1982733412","https://openalex.org/W1987393840","https://openalex.org/W1988311079","https://openalex.org/W1988888548","https://openalex.org/W2002555321","https://openalex.org/W2003117296","https://openalex.org/W2006412395","https://openalex.org/W2009654791","https://openalex.org/W2023930909","https://openalex.org/W2035080386","https://openalex.org/W2072806558","https://openalex.org/W2082238959","https://openalex.org/W2088866486","https://openalex.org/W2099625934","https://openalex.org/W2101234009","https://openalex.org/W2101511474","https://openalex.org/W2103877122","https://openalex.org/W2110404184","https://openalex.org/W2111667319","https://openalex.org/W2114977680","https://openalex.org/W2126004407","https://openalex.org/W2128539477","https://openalex.org/W2130289795","https://openalex.org/W2136324023","https://openalex.org/W4235776187","https://openalex.org/W4243205343","https://openalex.org/W4243261006","https://openalex.org/W4247995867","https://openalex.org/W6639344521"],"related_works":["https://openalex.org/W1980322368","https://openalex.org/W3099313426","https://openalex.org/W2096357811","https://openalex.org/W4287593139","https://openalex.org/W752783541","https://openalex.org/W1506547947","https://openalex.org/W2072005592","https://openalex.org/W4206811032","https://openalex.org/W2995605830","https://openalex.org/W2086123442"],"abstract_inverted_index":{"This":[0,31],"paper":[1],"presents":[2],"a":[3,61,152],"low-overhead":[4],"optimizer":[5,35,128],"for":[6,60,146],"the":[7,55,66,92,147,161,170,174],"ubiquitous":[8],"sparse":[9,25,63,99],"matrix-vector":[10],"multiplication":[11],"(SpMV)":[12],"kernel.":[13],"Architectural":[14],"diversity":[15,22],"among":[16,23],"different":[17,24],"processors":[18],"together":[19],"with":[20],"structural":[21],"matrices":[26,150],"lead":[27],"to":[28,82,140,157],"bottleneck":[29],"diversity.":[30],"justifies":[32],"an":[33,50],"SpMV":[34,59,145,166],"that":[36,52,136],"is":[37,89,123,138],"both":[38],"matrix-":[39],"and":[40,77,103,120,134,142,163],"architecture-adaptive":[41],"through":[42,70],"runtime":[43],"specialization.":[44],"To":[45],"this":[46],"direction,":[47],"we":[48],"present":[49],"approach":[51,111],"first":[53],"identifies":[54],"performance":[56],"bottlenecks":[57],"of":[58,149,173],"given":[62],"matrix":[64,74,100],"on":[65,91,129],"target":[67],"platform":[68],"either":[69],"profiling":[71],"or":[72],"by":[73],"property":[75],"inspection,":[76],"then":[78],"selects":[79],"suitable":[80],"optimizations":[81],"tackle":[83],"those":[84],"bottlenecks.":[85],"Our":[86],"optimization":[87,121],"pool":[88],"based":[90],"widely":[93],"used":[94],"Compressed":[95],"Sparse":[96],"Row":[97],"(CSR)":[98],"storage":[101],"format":[102],"has":[104],"low":[105],"preprocessing":[106],"overheads,":[107],"making":[108,119],"our":[109,127],"overall":[110],"practical":[112],"even":[113],"in":[114,151,169],"cases":[115],"where":[116],"fast":[117],"decision":[118],"setup":[122],"required.":[124],"We":[125],"evaluate":[126],"three":[130],"x86-based":[131],"computing":[132],"platforms":[133],"demonstrate":[135],"it":[137],"able":[139],"distinguish":[141],"appropriately":[143],"optimize":[144],"majority":[148],"representative":[153],"test":[154],"suite,":[155],"leading":[156],"significant":[158],"speedups":[159],"over":[160],"CSR":[162,165],"Inspector-Executor":[164],"kernels":[167],"available":[168],"latest":[171],"release":[172],"Intel":[175],"MKL":[176],"library.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
