{"id":"https://openalex.org/W4417014601","doi":"https://doi.org/10.1016/j.parco.2025.103169","title":"Cache partitioning for sparse matrix\u2013vector multiplication on the A64FX","display_name":"Cache partitioning for sparse matrix\u2013vector multiplication on the A64FX","publication_year":2025,"publication_date":"2025-12-05","ids":{"openalex":"https://openalex.org/W4417014601","doi":"https://doi.org/10.1016/j.parco.2025.103169"},"language":"en","primary_location":{"id":"doi:10.1016/j.parco.2025.103169","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2025.103169","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.parco.2025.103169","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090581401","display_name":"Sergej Breiter","orcid":"https://orcid.org/0009-0007-7742-1427"},"institutions":[{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sergej Breiter","raw_affiliation_strings":["Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Geschwister-Scholl-Platz 1, Munich, 80539, Germany"],"raw_orcid":"https://orcid.org/0009-0007-7742-1427","affiliations":[{"raw_affiliation_string":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Geschwister-Scholl-Platz 1, Munich, 80539, Germany","institution_ids":["https://openalex.org/I8204097"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003537956","display_name":"James Trotter","orcid":"https://orcid.org/0000-0003-4498-020X"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"James D. Trotter","raw_affiliation_strings":["Simula Research Laboratory, Kristian Augusts gate 23, Oslo, 0164, Norway"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simula Research Laboratory, Kristian Augusts gate 23, Oslo, 0164, Norway","institution_ids":["https://openalex.org/I2799829267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043839305","display_name":"Karl F\u00fcrlinger","orcid":"https://orcid.org/0000-0003-0398-4087"},"institutions":[{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Karl F\u00fcrlinger","raw_affiliation_strings":["Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Geschwister-Scholl-Platz 1, Munich, 80539, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Geschwister-Scholl-Platz 1, Munich, 80539, Germany","institution_ids":["https://openalex.org/I8204097"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090581401"],"corresponding_institution_ids":["https://openalex.org/I8204097"],"apc_list":{"value":2680,"currency":"USD","value_usd":2680},"apc_paid":{"value":2680,"currency":"USD","value_usd":2680},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41737789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"127","issue":null,"first_page":"103169","last_page":"103169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8880000114440918,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8880000114440918,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.03530000150203705,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.02019999921321869,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.8564000129699707},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.8169999718666077},{"id":"https://openalex.org/keywords/smart-cache","display_name":"Smart Cache","score":0.7925000190734863},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.7714999914169312},{"id":"https://openalex.org/keywords/cache-invalidation","display_name":"Cache invalidation","score":0.7483000159263611},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.7289000153541565},{"id":"https://openalex.org/keywords/cache-oblivious-algorithm","display_name":"Cache-oblivious algorithm","score":0.7283999919891357}],"concepts":[{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.8564000129699707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8414000272750854},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.8169999718666077},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.800000011920929},{"id":"https://openalex.org/C167713795","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"Smart Cache","level":5,"score":0.7925000190734863},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.7714999914169312},{"id":"https://openalex.org/C25536678","wikidata":"https://www.wikidata.org/wiki/Q5015977","display_name":"Cache invalidation","level":5,"score":0.7483000159263611},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.7289000153541565},{"id":"https://openalex.org/C59687516","wikidata":"https://www.wikidata.org/wiki/Q5015938","display_name":"Cache-oblivious algorithm","level":5,"score":0.7283999919891357},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.6164000034332275},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.453900009393692},{"id":"https://openalex.org/C120936851","wikidata":"https://www.wikidata.org/wiki/Q1408065","display_name":"MESI protocol","level":5,"score":0.41519999504089355},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.38499999046325684},{"id":"https://openalex.org/C51185590","wikidata":"https://www.wikidata.org/wiki/Q1017228","display_name":"Bus sniffing","level":5,"score":0.34950000047683716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32019999623298645},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25870001316070557},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1016/j.parco.2025.103169","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2025.103169","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"},{"id":"pmh:oai:epub.ub.uni-muenchen.de:133094","is_oa":false,"landing_page_url":"http://nbn-resolving.de/urn:nbn:de:bvb:19-epub-133094-0","pdf_url":null,"source":{"id":"https://openalex.org/S4306401845","display_name":"Open access LMU (Ludwid Maxmilian's Universitat Munchen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I8204097","host_organization_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","host_organization_lineage":["https://openalex.org/I8204097"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Parallel Computing","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1016/j.parco.2025.103169","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2025.103169","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5244242995","display_name":null,"funder_award_id":"270053","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G7418763235","display_name":null,"funder_award_id":"16HPC045","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320323299","display_name":"Norges Forskningsr\u00e5d","ror":"https://ror.org/00epmv149"},{"id":"https://openalex.org/F4320336863","display_name":"Leibniz-Rechenzentrum","ror":"https://ror.org/05558nw16"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1867807796","https://openalex.org/W1965351873","https://openalex.org/W1976750286","https://openalex.org/W1999472284","https://openalex.org/W2021089857","https://openalex.org/W2112311198","https://openalex.org/W2164705534","https://openalex.org/W2987963343","https://openalex.org/W3035019560","https://openalex.org/W3190673640","https://openalex.org/W4205662846","https://openalex.org/W4230444879","https://openalex.org/W4242148175","https://openalex.org/W4251280768","https://openalex.org/W4284888541","https://openalex.org/W4388581083"],"related_works":[],"abstract_inverted_index":{"One":[0],"of":[1,5,20,31,46,66,105,112,130,152,162,185,194,207,232],"the":[2,6,11,21,28,48,57,87,103,123,125,128,150,169,189,192,195,230],"novel":[3],"features":[4],"Fujitsu":[7],"A64FX":[8],"CPU":[9],"is":[10,34,77,166,172],"sector":[12,49,88,118,154,196],"cache":[13,50,89,93,98,107,115,119,132,155,197,216,233],".":[14,176],"This":[15,40],"feature":[16],"enables":[17],"hardware-supported":[18],"partitioning":[19],"L1":[22],"and":[23,26,85,95,136,140,182],"L2":[24,106,131],"caches":[25,228],"allows":[27],"programmer":[29],"control":[30],"which":[32,84],"partition":[33],"used":[35,78],"to":[36,51,79,91,96],"place":[37],"data":[38],"in.":[39],"paper":[41],"performs":[42],"an":[43,110],"in-depth":[44,183],"study":[45,181],"applying":[47],"sparse":[52,68,208],"matrix-vector":[53],"multiplication":[54],"(SpMV)":[55],"in":[56,83],"Compressed":[58],"Sparse":[59],"Row":[60],"(CSR)":[61],"format":[62],"using":[63,210],"a":[64],"collection":[65],"490":[67],"matrices.":[69],"A":[70,159,178,200,215],"performance":[71,180],"model":[72,101,126,218],"based":[73,203],"on":[74,122,157,188,204,222],"reuse":[75,94,205],"analysis":[76,184,202],"better":[80],"understand":[81],"situations":[82],"how":[86],"leads":[90],"improved":[92],"predict":[97],"behavior.":[99],"The":[100],"predicts":[102,127],"number":[104,129],"misses":[108],"within":[109,134],"error":[111],"2%":[113],"without":[114],"partitioning.":[116,234],"With":[117],"enabled,":[120],"depending":[121],"configuration,":[124],"missed":[133],"2\u20133%":[135],"4\u201318%":[137],"for":[138,219],"sequential":[139],"parallel":[141,220],"SpMV":[142,187],"with":[143,225],"48":[144],"threads,":[145],"respectively.":[146],"Further":[147],"experiments":[148],"show":[149],"effect":[151,193,231],"various":[153],"configurations":[156],"performance.":[158],"median":[160],"speedup":[161,171],"about":[163,173],"1.05":[164],"\u00d7":[165,175],"achieved,":[167],"whereas":[168],"maximum":[170],"1.6":[174],"\u2022":[177,199,214],"comprehensive":[179],"CSR":[186],"A64FX,":[190],"including":[191,229],"feature.":[198],"locality":[201],"distance":[206],"matrices":[209],"their":[211],"sparsity":[212],"pattern.":[213],"miss":[217],"codes":[221],"multicore":[223],"architectures":[224],"multiple":[226],"shared":[227]},"counts_by_year":[],"updated_date":"2026-02-22T13:39:03.778224","created_date":"2025-12-05T00:00:00"}
