{"id":"https://openalex.org/W7126053170","doi":"https://doi.org/10.1016/j.parco.2026.103183","title":"Microarchitectural comparison, in-core modeling, and memory hierarchy analysis of state-of-the-art CPUs: Grace, Sapphire Rapids, and Genoa","display_name":"Microarchitectural comparison, in-core modeling, and memory hierarchy analysis of state-of-the-art CPUs: Grace, Sapphire Rapids, and Genoa","publication_year":2026,"publication_date":"2026-01-29","ids":{"openalex":"https://openalex.org/W7126053170","doi":"https://doi.org/10.1016/j.parco.2026.103183"},"language":"en","primary_location":{"id":"doi:10.1016/j.parco.2026.103183","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2026.103183","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.parco.2026.103183","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031307529","display_name":"Jan Laukemann","orcid":"https://orcid.org/0000-0002-3776-9353"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jan Laukemann","raw_affiliation_strings":["Erlangen National High Performance Computing Center (NHR@FAU), Martensstr. 1, Erlangen, 91058, Germany","Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Department of Computer Science, Martensstr. 3, Erlangen, 91058, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen National High Performance Computing Center (NHR@FAU), Martensstr. 1, Erlangen, 91058, Germany","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Department of Computer Science, Martensstr. 3, Erlangen, 91058, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123567706","display_name":"Georg Hager","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Erlangen National High Performance Computing Center (NHR@FAU), Martensstr. 1, Erlangen, 91058, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen National High Performance Computing Center (NHR@FAU), Martensstr. 1, Erlangen, 91058, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070209050","display_name":"Gerhard Wellein","orcid":"https://orcid.org/0000-0001-7371-3026"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerhard Wellein","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Department of Computer Science, Martensstr. 3, Erlangen, 91058, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Department of Computer Science, Martensstr. 3, Erlangen, 91058, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031307529"],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":{"value":2680,"currency":"USD","value_usd":2680},"apc_paid":{"value":2680,"currency":"USD","value_usd":2680},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.50046948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"127","issue":null,"first_page":"103183","last_page":"103183"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.012400000356137753,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.006099999882280827,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.8076000213623047},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.710099995136261},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.5688999891281128},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.44769999384880066},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.4320000112056732},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.423799991607666},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3815000057220459}],"concepts":[{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.8076000213623047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7936999797821045},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.710099995136261},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5817000269889832},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.5688999891281128},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.44769999384880066},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.4320000112056732},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.423799991607666},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3815000057220459},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.3725000023841858},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.33149999380111694},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.29750001430511475},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.28279998898506165},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.27869999408721924},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2768999934196472},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.26460000872612},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C187123476","wikidata":"https://www.wikidata.org/wiki/Q1197550","display_name":"Computer performance","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.parco.2026.103183","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2026.103183","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.parco.2026.103183","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.parco.2026.103183","pdf_url":null,"source":{"id":"https://openalex.org/S112708030","display_name":"Parallel Computing","issn_l":"0167-8191","issn":["0167-8191","1872-7336"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W2002555321","https://openalex.org/W3190673640"],"related_works":[],"abstract_inverted_index":{"Three":[0],"big":[1],"semiconductor":[2],"companies":[3],"in":[4,9,95,167,195,211],"HPC":[5],"are":[6,21],"currently":[7],"competing":[8],"the":[10,13,30,48,54,72,88,104,116,127,136,146,157,180,189,193,198],"race":[11],"for":[12,69],"best":[14],"CPU:":[15],"AMD,":[16],"Intel,":[17],"and":[18,39,59,80,209],"NVIDIA.":[19],"There":[20],"significant":[22],"differences":[23,94],"among":[24],"their":[25],"state-of-the-art":[26],"CPU":[27,160],"designs,":[28],"spanning":[29],"entire":[31],"range":[32],"from":[33],"instruction":[34],"execution":[35],"to":[36,173,213],"cache":[37,190,205],"behavior":[38,114],"main":[40],"memory":[41,117,137],"bandwidth.":[42],"In":[43],"this":[44,91],"work,":[45],"we":[46,107,187],"analyze":[47],"performance":[49,67,201],"of":[50,122,153,183,192,197],"CPUs":[51,194],"based":[52],"on":[53,176,207],"Zen":[55],"4,":[56],"Golden":[57],"Cove,":[58],"Neoverse":[60],"V2":[61],"microarchitectures.":[62],"We":[63,124,143],"create":[64],"accurate":[65],"in-core":[66,96],"models":[68],"use":[70,182],"with":[71,85],"Open":[73],"Source":[74],"Architecture":[75],"Code":[76],"Analyzer":[77],"(OSACA)":[78],"tool":[79,89],"compare":[81],"its":[82],"prediction":[83],"accuracy":[84],"llvm-mca.":[86],"Beyond":[87,103],"aspect,":[90],"reveals":[92],"interesting":[93],"design":[97],"points":[98],"but":[99],"also":[100],"some":[101],"commonalities.":[102],"single":[105],"core,":[106],"extend":[108],"our":[109],"comparison":[110],"by":[111,140],"measuring":[112],"data-transfer":[113],"through":[115],"hierarchy":[118,191],"using":[119],"a":[120,150],"variety":[121],"microbenchmarks.":[123],"thoroughly":[125],"investigate":[126],"\u201cwrite-allocate":[128],"(WA)":[129],"evasion\u201d":[130],"feature,":[131],"which":[132],"can":[133,161],"automatically":[134],"reduce":[135],"traffic":[138],"caused":[139],"write":[141,163],"misses.":[142],"show":[144],"that":[145],"Grace":[147,210],"Superchip":[148],"has":[149],"next-to-optimal":[151],"implementation":[152],"WA":[154],"evasion":[155],"while":[156],"Sapphire":[158,214],"Rapids":[159],"avoid":[162],"allocates":[164],"completely":[165],"only":[166,171],"specific":[168],"scenarios.":[169],"The":[170],"way":[172],"eliminate":[174],"WAs":[175],"AMD":[177],"Genoa":[178,208],"is":[179],"explicit":[181],"non-temporal":[184],"stores.":[185],"Finally,":[186],"study":[188],"view":[196],"Execution-Cache-Memory":[199],"(ECM)":[200],"model,":[202],"revealing":[203],"overlapping":[204],"hierarchies":[206],"contrast":[212],"Rapids.":[215]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-30T00:00:00"}
