{"id":"https://openalex.org/W2901115656","doi":"https://doi.org/10.1109/tpds.2018.2883056","title":"Modeling Non-Uniform Memory Access on Large Compute Nodes with the Cache-Aware Roofline Model","display_name":"Modeling Non-Uniform Memory Access on Large Compute Nodes with the Cache-Aware Roofline Model","publication_year":2018,"publication_date":"2018-11-23","ids":{"openalex":"https://openalex.org/W2901115656","doi":"https://doi.org/10.1109/tpds.2018.2883056","mag":"2901115656"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2018.2883056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2883056","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-01924951","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017891047","display_name":"Nicolas Denoyelle","orcid":"https://orcid.org/0000-0001-8282-9653"},"institutions":[{"id":"https://openalex.org/I4210131512","display_name":"Centre Inria de l'universit\u00e9 de Bordeaux","ror":"https://ror.org/03tjcj052","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210131512"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Nicolas Denoyelle","raw_affiliation_strings":["Inria \u2013 Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)"],"affiliations":[{"raw_affiliation_string":"Inria \u2013 Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","institution_ids":["https://openalex.org/I4210131512"]},{"raw_affiliation_string":"TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008381628","display_name":"Brice Goglin","orcid":"https://orcid.org/0000-0002-8671-4615"},"institutions":[{"id":"https://openalex.org/I4210131512","display_name":"Centre Inria de l'universit\u00e9 de Bordeaux","ror":"https://ror.org/03tjcj052","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210131512"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Brice Goglin","raw_affiliation_strings":["Inria \u2013 Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)"],"affiliations":[{"raw_affiliation_string":"Inria \u2013 Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","institution_ids":["https://openalex.org/I4210131512"]},{"raw_affiliation_string":"TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024686169","display_name":"Aleksandar Ili\u0107","orcid":"https://orcid.org/0000-0002-8594-3539"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I4210089667","display_name":"TARH (Portugal)","ror":"https://ror.org/008yah456","country_code":"PT","type":"company","lineage":["https://openalex.org/I4210089667"]},{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Aleksandar Ilic","raw_affiliation_strings":["INESC-ID, Universidade de Lisboa, Instituto Superior T&#x00E9;cnico, Lisbon, Portugal","IST - Instituto Superior T\u00e9cnico, Universidade T\u00e9cnica de Lisboa (Campus Tecnol\u00f3gico e Nuclear\r\nE.N. 10, 2686-953 Sacav\u00e9m - Portugal)"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Universidade de Lisboa, Instituto Superior T&#x00E9;cnico, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"IST - Instituto Superior T\u00e9cnico, Universidade T\u00e9cnica de Lisboa (Campus Tecnol\u00f3gico e Nuclear\r\nE.N. 10, 2686-953 Sacav\u00e9m - Portugal)","institution_ids":["https://openalex.org/I4210089667","https://openalex.org/I4387152517"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005797486","display_name":"Emmanuel Jeannot","orcid":"https://orcid.org/0000-0002-3956-2997"},"institutions":[{"id":"https://openalex.org/I4210131512","display_name":"Centre Inria de l'universit\u00e9 de Bordeaux","ror":"https://ror.org/03tjcj052","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210131512"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Jeannot","raw_affiliation_strings":["Inria &#x2013; Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)"],"affiliations":[{"raw_affiliation_string":"Inria &#x2013; Bordeaux - Sud-Ouest, University Bordeaux, Bordeaux, France","institution_ids":["https://openalex.org/I4210131512"]},{"raw_affiliation_string":"TADAAM - Topology-Aware System-Scale Data Management for High-Performance Computing (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077537777","display_name":"Leonel Sousa","orcid":"https://orcid.org/0000-0002-8066-221X"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I4210089667","display_name":"TARH (Portugal)","ror":"https://ror.org/008yah456","country_code":"PT","type":"company","lineage":["https://openalex.org/I4210089667"]},{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Leonel Sousa","raw_affiliation_strings":["INESC-ID, Universidade de Lisboa, Instituto Superior T&#x00E9;cnico, Lisbon, Portugal","IST - Instituto Superior T\u00e9cnico, Universidade T\u00e9cnica de Lisboa (Campus Tecnol\u00f3gico e Nuclear\r\nE.N. 10, 2686-953 Sacav\u00e9m - Portugal)"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Universidade de Lisboa, Instituto Superior T&#x00E9;cnico, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"IST - Instituto Superior T\u00e9cnico, Universidade T\u00e9cnica de Lisboa (Campus Tecnol\u00f3gico e Nuclear\r\nE.N. 10, 2686-953 Sacav\u00e9m - Portugal)","institution_ids":["https://openalex.org/I4210089667","https://openalex.org/I4387152517"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5017891047"],"corresponding_institution_ids":["https://openalex.org/I4210131512"],"apc_list":null,"apc_paid":null,"fwci":2.1057,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.87486929,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"30","issue":"6","first_page":"1374","last_page":"1389"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8441927433013916},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5891236662864685},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5891109704971313},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5568545460700989},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.5236525535583496},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.506273627281189},{"id":"https://openalex.org/keywords/instruction-prefetch","display_name":"Instruction prefetch","score":0.42165112495422363},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.42107030749320984},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3956122398376465},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3776634931564331},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3269268870353699},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.2355298399925232},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.19598209857940674},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.19448238611221313}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8441927433013916},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5891236662864685},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5891109704971313},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5568545460700989},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.5236525535583496},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.506273627281189},{"id":"https://openalex.org/C133588205","wikidata":"https://www.wikidata.org/wiki/Q28455645","display_name":"Instruction prefetch","level":3,"score":0.42165112495422363},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.42107030749320984},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3956122398376465},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3776634931564331},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3269268870353699},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.2355298399925232},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.19598209857940674},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.19448238611221313},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2018.2883056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2883056","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-01924951v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01924951","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems, 2019, 30 (6), pp.1374--1389. &#x27E8;10.1109/TPDS.2018.2883056&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01924951v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01924951","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems, 2019, 30 (6), pp.1374--1389. &#x27E8;10.1109/TPDS.2018.2883056&#x27E9;","raw_type":"Journal articles"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.44999998807907104,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4455644330","display_name":null,"funder_award_id":"IC1305","funder_id":"https://openalex.org/F4320320366","funder_display_name":"European Cooperation in Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320320366","display_name":"European Cooperation in Science and Technology","ror":"https://ror.org/01bstzn19"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1086484114","https://openalex.org/W1529554145","https://openalex.org/W1969923711","https://openalex.org/W2002555321","https://openalex.org/W2013626513","https://openalex.org/W2056639008","https://openalex.org/W2120881863","https://openalex.org/W2154865023","https://openalex.org/W2169665207","https://openalex.org/W2429898963","https://openalex.org/W2473693139","https://openalex.org/W2529487057","https://openalex.org/W2609137082","https://openalex.org/W2754695535","https://openalex.org/W2776241935","https://openalex.org/W4298168968","https://openalex.org/W6627141550","https://openalex.org/W6631899907","https://openalex.org/W6684730004","https://openalex.org/W6689182977","https://openalex.org/W6721034629","https://openalex.org/W6728321799","https://openalex.org/W6743950553","https://openalex.org/W6747315645"],"related_works":["https://openalex.org/W1994438830","https://openalex.org/W2153372734","https://openalex.org/W4249808101","https://openalex.org/W2142110652","https://openalex.org/W1522128341","https://openalex.org/W2183881829","https://openalex.org/W2350803493","https://openalex.org/W2101762019","https://openalex.org/W15724499","https://openalex.org/W1994858553"],"abstract_inverted_index":{"NUMA":[0,102],"platforms,":[1],"emerging":[2],"memory":[3,23,26,37,45,130],"architectures":[4],"with":[5,53,67,145,156],"on-package":[6],"high":[7],"bandwidth":[8,131,140],"memories":[9],"bring":[10],"new":[11,116,134],"opportunities":[12],"and":[13,22,108,124],"challenges":[14],"to":[15,49,69,84,100],"bridge":[16],"the":[17,34,54,70,78,98,115,122,138,157],"gap":[18],"between":[19],"computing":[20,59],"power":[21],"performance.":[24,61],"Heterogeneous":[25],"machines":[27],"feature":[28],"several":[29],"performance":[30,46,66,72,86,155],"trade-offs,":[31],"depending":[32],"on":[33,166],"kind":[35],"of":[36,57,97,113,118,126],"used,":[38],"when":[39,152],"writing":[40],"or":[41],"reading":[42],"it.":[43],"Finding":[44],"upper-bounds":[47],"subject":[48],"such":[50,105],"trade-offs":[51,144],"aligns":[52],"numerous":[55],"interests":[56],"measuring":[58],"system":[60],"In":[62,88],"particular,":[63],"representing":[64],"applications":[65,154],"respect":[68],"platform":[71],"bounds":[73],"has":[74],"been":[75],"addressed":[76],"in":[77],"state-of-the-art":[79],"Cache-Aware":[80],"Roofline":[81],"Model":[82],"(CARM)":[83],"troubleshoot":[85],"issues.":[87],"this":[89,119],"paper,":[90],"we":[91],"present":[92],"a":[93,127],"Locality-Aware":[94],"extension":[95],"(LARM)":[96],"CARM":[99],"model":[101,136],"platforms":[103],"bottlenecks,":[104],"as":[106],"contention":[107],"remote":[109],"access.":[110],"On":[111],"top":[112],"this,":[114],"contribution":[117],"paper":[120],"is":[121],"design":[123],"validation":[125],"novel":[128],"hybrid":[129,135],"model.":[132],"This":[133],"quantifies":[137],"achievable":[139],"upper-bound":[141],"under":[142],"above-described":[143],"less":[146],"than":[147],"3":[148],"percent":[149],"error.":[150],"Hence,":[151],"comparing":[153],"maximum":[158],"attainable":[159],"performance,":[160],"software":[161],"designers":[162],"can":[163],"now":[164],"rely":[165],"more":[167],"accurate":[168],"information.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
