{"id":"https://openalex.org/W2891241286","doi":"https://doi.org/10.1109/pmbs.2018.8641578","title":"Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures","display_name":"Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2891241286","doi":"https://doi.org/10.1109/pmbs.2018.8641578","mag":"2891241286"},"language":"en","primary_location":{"id":"doi:10.1109/pmbs.2018.8641578","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pmbs.2018.8641578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1809.00912","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jan Laukemann","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jan Laukemann","raw_affiliation_strings":["Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Julian Hammer","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Julian Hammer","raw_affiliation_strings":["Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Johannes Hofmann","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Johannes Hofmann","raw_affiliation_strings":["Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Georg Hager","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":null,"display_name":"Gerhard Wellein","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerhard Wellein","raw_affiliation_strings":["Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich- Alexander- Universit\u00e4t Erlangen- N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":2.8947,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.91330622,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"121","last_page":"131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7660999894142151},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4975000023841858},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.48179998993873596},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.48069998621940613},{"id":"https://openalex.org/keywords/semaphore","display_name":"Semaphore","score":0.42809998989105225},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.39079999923706055},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.3847000002861023},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.382999986410141},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.382999986410141},{"id":"https://openalex.org/keywords/processor-register","display_name":"Processor register","score":0.33980000019073486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8705999851226807},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7660999894142151},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5218999981880188},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4975000023841858},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.48179998993873596},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.48069998621940613},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4366999864578247},{"id":"https://openalex.org/C95203288","wikidata":"https://www.wikidata.org/wiki/Q221682","display_name":"Semaphore","level":2,"score":0.42809998989105225},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.39079999923706055},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3847000002861023},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.38100001215934753},{"id":"https://openalex.org/C2871975","wikidata":"https://www.wikidata.org/wiki/Q187466","display_name":"Processor register","level":4,"score":0.33980000019073486},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33959999680519104},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.337799996137619},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.33629998564720154},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C73564150","wikidata":"https://www.wikidata.org/wiki/Q11417093","display_name":"Instruction scheduling","level":5,"score":0.3068000078201294},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.3037000000476837},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C50831359","wikidata":"https://www.wikidata.org/wiki/Q165436","display_name":"Assembly language","level":3,"score":0.28380000591278076},{"id":"https://openalex.org/C168522837","wikidata":"https://www.wikidata.org/wiki/Q679552","display_name":"Branch predictor","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.27709999680519104},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.26930001378059387},{"id":"https://openalex.org/C1793878","wikidata":"https://www.wikidata.org/wiki/Q1153762","display_name":"Out-of-order execution","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C115168132","wikidata":"https://www.wikidata.org/wiki/Q55813","display_name":"Machine code","level":3,"score":0.26510000228881836},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C141331961","wikidata":"https://www.wikidata.org/wiki/Q2164465","display_name":"Speculative execution","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.2572000026702881},{"id":"https://openalex.org/C50805821","wikidata":"https://www.wikidata.org/wiki/Q1136670","display_name":"Titan (rocket family)","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/pmbs.2018.8641578","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pmbs.2018.8641578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1809.00912","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1809.00912","pdf_url":"https://arxiv.org/pdf/1809.00912","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1809.00912","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1809.00912","pdf_url":"https://arxiv.org/pdf/1809.00912","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W842872497","https://openalex.org/W1517652255","https://openalex.org/W2078636996","https://openalex.org/W2113235308","https://openalex.org/W2128523353","https://openalex.org/W2147657366","https://openalex.org/W2588108287","https://openalex.org/W6659444031","https://openalex.org/W6753910507"],"related_works":[],"abstract_inverted_index":{"An":[0],"accurate":[1],"prediction":[2],"of":[3,7,20,34,52,80,89,102],"scheduling":[4],"and":[5,42,47,59,94,110,113,122,138],"execution":[6,78],"instruction":[8],"streams":[9],"is":[10],"a":[11,49,71,104],"necessary":[12],"prerequisite":[13],"for":[14,75,117],"predicting":[15,76],"the":[16,40,43,53,64,77,87,100,118,128,152],"in-core":[17],"performance":[18,36],"behavior":[19],"throughput-bound":[21],"loop":[22,60],"kernels":[23,137],"on":[24,150],"out-of-order":[25,96],"processor":[26],"architectures.":[27,159],"Such":[28],"predictions":[29,141],"are":[30],"an":[31,90,148],"indispensable":[32],"component":[33],"analytical":[35],"models,":[37,130],"such":[38],"as":[39],"Roofline":[41],"Execution-Cache-Memory":[44],"(ECM)":[45],"model,":[46],"allow":[48],"deep":[50],"understanding":[51],"performance-relevant":[54],"interactions":[55],"between":[56],"hardware":[57],"architecture":[58],"code.":[61],"We":[62,98],"present":[63],"Open":[65],"Source":[66],"Architecture":[67],"Code":[68],"Analyzer":[69],"(OSACA),":[70],"static":[72],"analysis":[73],"tool":[74],"time":[79],"sequential":[81],"loops":[82],"comprising":[83],"x86":[84],"instructions":[85],"under":[86],"assumption":[88],"infinite":[91],"first-level":[92],"cache":[93],"perfect":[95],"scheduling.":[97],"show":[99],"process":[101],"building":[103],"machine":[105],"model":[106],"from":[107],"available":[108],"documentation":[109],"semi-automatic":[111],"benchmarking,":[112],"carry":[114],"it":[115],"out":[116],"latest":[119],"Intel":[120],"Skylake":[121],"AMD":[123],"Zen":[124],"micro-architectures.":[125],"To":[126],"validate":[127],"constructed":[129],"we":[131,146],"apply":[132],"them":[133],"to":[134,157],"several":[135],"assembly":[136],"compare":[139],"runtime":[140],"with":[142],"actual":[143],"measurements.":[144],"Finally":[145],"give":[147],"outlook":[149],"how":[151],"method":[153],"may":[154],"be":[155],"generalized":[156],"new":[158]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2018-09-27T00:00:00"}
