{"id":"https://openalex.org/W4415250869","doi":"https://doi.org/10.1109/hpec67600.2025.11196688","title":"Performance Analysis of the Parallel Shared-Memory Sparse Matrix-Vector Multiplication on Unstructured Matrices","display_name":"Performance Analysis of the Parallel Shared-Memory Sparse Matrix-Vector Multiplication on Unstructured Matrices","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250869","doi":"https://doi.org/10.1109/hpec67600.2025.11196688"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196688","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120024874","display_name":"Kobe Bergmans","orcid":null},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Kobe Bergmans","raw_affiliation_strings":["KU Leuven,Department of Computer Science,Belgium"],"affiliations":[{"raw_affiliation_string":"KU Leuven,Department of Computer Science,Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079388991","display_name":"Karl Meerbergen","orcid":"https://orcid.org/0000-0002-1508-0248"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Karl Meerbergen","raw_affiliation_strings":["KU Leuven,Department of Computer Science,Belgium"],"affiliations":[{"raw_affiliation_string":"KU Leuven,Department of Computer Science,Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039999347","display_name":"Raf Vandebril","orcid":"https://orcid.org/0000-0003-2119-8696"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Raf Vandebril","raw_affiliation_strings":["KU Leuven,Department of Computer Science,Belgium"],"affiliations":[{"raw_affiliation_string":"KU Leuven,Department of Computer Science,Belgium","institution_ids":["https://openalex.org/I99464096"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120024874"],"corresponding_institution_ids":["https://openalex.org/I99464096"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31514776,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.8194000124931335},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5939000248908997},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5799000263214111},{"id":"https://openalex.org/keywords/multiplication-algorithm","display_name":"Multiplication algorithm","score":0.5121999979019165},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.3337000012397766}],"concepts":[{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.8194000124931335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76419997215271},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5939000248908997},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.593500018119812},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5799000263214111},{"id":"https://openalex.org/C201290732","wikidata":"https://www.wikidata.org/wiki/Q130762","display_name":"Multiplication algorithm","level":3,"score":0.5121999979019165},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4645000100135803},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.30070000886917114},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196688","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322308","display_name":"KU Leuven","ror":"https://ror.org/05f950310"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1501021412","https://openalex.org/W1506342804","https://openalex.org/W1581507139","https://openalex.org/W1976969221","https://openalex.org/W2028045344","https://openalex.org/W2035080386","https://openalex.org/W2072605585","https://openalex.org/W2114827335","https://openalex.org/W2126004407","https://openalex.org/W2142496304","https://openalex.org/W2483318309","https://openalex.org/W2489110754","https://openalex.org/W3107316479","https://openalex.org/W4210351808","https://openalex.org/W4247828381"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"investigates":[2],"sparse":[3,11,142],"matrix-vector":[4],"(SpMV)":[5],"multiplication":[6,18,45,64,111],"algorithm":[7,19],"performance":[8],"for":[9,20,61,139],"unstructured":[10,141],"matrices.":[12],"The":[13],"development":[14],"of":[15,23,38,80,89,157,168],"an":[16],"SpMV":[17,44,63,110],"this":[21,99],"type":[22],"data":[24],"is":[25,93,132],"hard":[26],"due":[27],"to":[28,105,116],"two":[29,147,150],"factors.":[30],"First,":[31],"parallel":[32,62],"load":[33],"balancing":[34],"issues":[35],"arise":[36],"because":[37,50],"the":[39,51,81,101,114,125,158,169],"unpredictable":[40],"nonzero":[41],"structure.":[42],"Second,":[43],"algorithms":[46,60,73,92,102,112,161],"are":[47,68,74,103,137],"inevitably":[48],"memory-bound":[49],"sparsity":[52],"causes":[53],"a":[54,120],"low":[55],"arithmetic":[56],"intensity.":[57],"Three":[58,70],"state-of-the-art":[59],"on":[65,144,166],"shared-memory":[66],"systems":[67],"discussed.":[69],"new":[71],"hybrid":[72],"developed":[75,160],"which":[76],"combine":[77],"optimization":[78],"techniques":[79],"current":[82],"algorithms.":[83],"A":[84],"modern":[85],"and":[86,149],"high-performance":[87],"implementation":[88,100],"all":[90],"discussed":[91],"provided":[94],"as":[95],"open-source":[96],"software.":[97],"Using":[98],"compared":[104],"Intel\u2019s":[106],"oneMKL":[107,163],"library.":[108],"Furthermore,":[109],"require":[113],"matrix":[115],"be":[117],"stored":[118],"in":[119],"specific":[121],"storage":[122,130],"format.":[123],"Therefore,":[124],"conversion":[126],"time":[127],"between":[128],"these":[129],"formats":[131],"also":[133],"analyzed.":[134],"Both":[135],"tests":[136],"performed":[138],"multiple":[140],"matrices":[143],"different":[145],"machines:":[146],"multi-CPU":[148,170],"single-CPU":[151],"architectures.":[152,171],"We":[153],"show":[154],"that":[155],"one":[156,167],"newly":[159],"outperforms":[162],"by":[164],"40%":[165]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-16T00:00:00"}
