{"id":"https://openalex.org/W3205833606","doi":"https://doi.org/10.1145/3505285","title":"Metrics and Design of an Instruction Roofline Model for AMD GPUs","display_name":"Metrics and Design of an Instruction Roofline Model for AMD GPUs","publication_year":2022,"publication_date":"2022-01-31","ids":{"openalex":"https://openalex.org/W3205833606","doi":"https://doi.org/10.1145/3505285","mag":"3205833606"},"language":"en","primary_location":{"id":"doi:10.1145/3505285","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3505285","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079075987","display_name":"Matthew Leinhauser","orcid":"https://orcid.org/0000-0003-2914-1483"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Matthew Leinhauser","raw_affiliation_strings":["Center for Advanced Systems Understanding, and University of Delaware, Newark, Delaware, USA"],"raw_orcid":"https://orcid.org/0000-0003-2914-1483","affiliations":[{"raw_affiliation_string":"Center for Advanced Systems Understanding, and University of Delaware, Newark, Delaware, USA","institution_ids":["https://openalex.org/I86501945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059134730","display_name":"Ren\u00e9 Widera","orcid":"https://orcid.org/0000-0003-1642-0459"},"institutions":[{"id":"https://openalex.org/I2801798921","display_name":"Helmholtz-Zentrum Dresden-Rossendorf","ror":"https://ror.org/01zy2cs03","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2801798921"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ren\u00e9 Widera","raw_affiliation_strings":["Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany"],"raw_orcid":"https://orcid.org/0000-0003-1642-0459","affiliations":[{"raw_affiliation_string":"Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany","institution_ids":["https://openalex.org/I2801798921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067389281","display_name":"Sergei Bastrakov","orcid":"https://orcid.org/0000-0003-3396-6154"},"institutions":[{"id":"https://openalex.org/I2801798921","display_name":"Helmholtz-Zentrum Dresden-Rossendorf","ror":"https://ror.org/01zy2cs03","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2801798921"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sergei Bastrakov","raw_affiliation_strings":["Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany"],"raw_orcid":"https://orcid.org/0000-0003-3396-6154","affiliations":[{"raw_affiliation_string":"Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany","institution_ids":["https://openalex.org/I2801798921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007216468","display_name":"Alexander Debus","orcid":"https://orcid.org/0000-0002-3844-3697"},"institutions":[{"id":"https://openalex.org/I2801798921","display_name":"Helmholtz-Zentrum Dresden-Rossendorf","ror":"https://ror.org/01zy2cs03","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2801798921"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Debus","raw_affiliation_strings":["Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3844-3697","affiliations":[{"raw_affiliation_string":"Helmholtz-Zentrum Dresden-Rossendorf Laboratory, Dresden, Germany","institution_ids":["https://openalex.org/I2801798921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025005466","display_name":"Michael Bu\u00dfmann","orcid":"https://orcid.org/0000-0002-8258-3881"},"institutions":[{"id":"https://openalex.org/I2801798921","display_name":"Helmholtz-Zentrum Dresden-Rossendorf","ror":"https://ror.org/01zy2cs03","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2801798921"]},{"id":"https://openalex.org/I4210133756","display_name":"Center for Advanced Systems Understanding","ror":"https://ror.org/042b69396","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2801798921","https://openalex.org/I4210133756"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael Bussmann","raw_affiliation_strings":["Center for Advanced Systems Understanding, and Helmholtz-ZentrumDresden-Rossendorf Laboratory, Dresden, Germany"],"raw_orcid":"https://orcid.org/0000-0002-8258-3881","affiliations":[{"raw_affiliation_string":"Center for Advanced Systems Understanding, and Helmholtz-ZentrumDresden-Rossendorf Laboratory, Dresden, Germany","institution_ids":["https://openalex.org/I4210133756","https://openalex.org/I2801798921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009614578","display_name":"Sunita Chandrasekaran","orcid":"https://orcid.org/0000-0002-3560-9428"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sunita Chandrasekaran","raw_affiliation_strings":["University of Delaware, Newark, Delaware, USA"],"raw_orcid":"https://orcid.org/0000-0002-3560-9428","affiliations":[{"raw_affiliation_string":"University of Delaware, Newark, Delaware, USA","institution_ids":["https://openalex.org/I86501945"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5079075987"],"corresponding_institution_ids":["https://openalex.org/I86501945"],"apc_list":null,"apc_paid":null,"fwci":3.2641,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.92107623,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"9","issue":"1","first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8548479676246643},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6677488088607788},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5255067348480225},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.510071337223053},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4850403666496277},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4672401249408722},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.45227131247520447},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4485887885093689},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4476467967033386},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2049679458141327},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.07154977321624756}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8548479676246643},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6677488088607788},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5255067348480225},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.510071337223053},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4850403666496277},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4672401249408722},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.45227131247520447},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4485887885093689},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4476467967033386},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2049679458141327},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.07154977321624756},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3505285","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3505285","pdf_url":null,"source":{"id":"https://openalex.org/S2483380313","display_name":"ACM Transactions on Parallel Computing","issn_l":"2329-4949","issn":["2329-4949","2329-4957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Parallel Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.5}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2002555321","https://openalex.org/W2075897759","https://openalex.org/W2154865023","https://openalex.org/W2311803642","https://openalex.org/W2414288867","https://openalex.org/W2493723099","https://openalex.org/W2528745529","https://openalex.org/W2731996856","https://openalex.org/W2972774555","https://openalex.org/W2983865192","https://openalex.org/W3015498000","https://openalex.org/W3099681961","https://openalex.org/W3100444102","https://openalex.org/W3124474054","https://openalex.org/W3153525491","https://openalex.org/W4285478533"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825"],"abstract_inverted_index":{"Due":[0,35],"to":[1,15,36,53,86,166],"the":[2,6,29,37,126,141,154,167,191,195,200],"recent":[3],"announcement":[4],"of":[5,40,143,146,177,199],"Frontier":[7],"supercomputer,":[8],"many":[9],"scientific":[10,110],"application":[11,55,118],"developers":[12],"are":[13],"working":[14],"make":[16,174],"their":[17],"applications":[18],"compatible":[19],"with":[20],"AMD":[21,44,58,70,98,129,134,155,192],"(CPU-GPU)":[22],"architectures,":[23],"which":[24],"means":[25],"moving":[26],"away":[27],"from":[28],"traditional":[30],"CPU":[31],"and":[32,75,93,122,133,188],"NVIDIA-GPU":[33],"systems.":[34],"current":[38],"limitations":[39],"profiling":[41,171],"tools":[42],"for":[43,69,106,120],"GPUs,":[45],"this":[46,61,205],"shift":[47],"leaves":[48],"a":[49,76,84,107,159],"void":[50],"in":[51,91,148,204],"how":[52],"measure":[54,87],"performance":[56,90,142,176,197],"on":[57,96,125],"GPUs.":[59,137],"In":[60],"article,":[62],"we":[63,101,150],"design":[64],"an":[65,88,113],"instruction":[66,103,189],"roofline":[67,104],"model":[68],"GPUs":[71,202],"using":[72],"AMD\u2019s":[73],"ROCProfiler":[74],"benchmarking":[77],"tool,":[78],"BabelStream":[79],"(the":[80],"HIP":[81],"implementation),":[82],"as":[83],"way":[85],"application\u2019s":[89],"instructions":[92],"memory":[94],"transactions":[95],"new":[97],"hardware.":[99],"Specifically,":[100],"create":[102],"models":[105],"case":[108],"study":[109],"application,":[111],"PIConGPU,":[112],"open":[114],"source":[115],"particle-in-cell":[116],"simulations":[117],"used":[119,203],"plasma":[121],"laser-plasma":[123],"physics":[124],"NVIDIA":[127,168],"V100,":[128],"Radeon":[130],"Instinct":[131,135],"MI60,":[132],"MI100":[136,156],"When":[138,182],"looking":[139,183],"at":[140,184],"multiple":[144],"kernels":[145],"interest":[147],"PIConGPU":[149],"find":[151],"that":[152],"although":[153],"GPU":[157],"achieves":[158,194],"similar,":[160],"or":[161],"better,":[162],"execution":[163,185],"time":[164],"compared":[165],"V100":[169],"GPU,":[170],"tool":[172],"differences":[173],"comparing":[175],"these":[178],"two":[179],"architectures":[180],"hard.":[181],"time,":[186],"GIPS,":[187],"intensity,":[190],"MI60":[193],"worst":[196],"out":[198],"three":[201],"work.":[206]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
