{"id":"https://openalex.org/W2038666141","doi":"https://doi.org/10.1109/hpca.2015.7056063","title":"GPGPU performance and power estimation using machine learning","display_name":"GPGPU performance and power estimation using machine learning","publication_year":2015,"publication_date":"2015-02-01","ids":{"openalex":"https://openalex.org/W2038666141","doi":"https://doi.org/10.1109/hpca.2015.7056063","mag":"2038666141"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2015.7056063","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2015.7056063","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058047421","display_name":"Gene Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gene Wu","raw_affiliation_strings":["Electrical and Computer Engineering The University of Texas at Austin","Electrical and Computer Engineering, The University of Texas at Austin"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Electrical and Computer Engineering, The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034110574","display_name":"Joseph L. Greathouse","orcid":"https://orcid.org/0000-0003-0181-5511"},"institutions":[{"id":"https://openalex.org/I1311921367","display_name":"Advanced Micro Devices (Canada)","ror":"https://ror.org/02yh0k313","country_code":"CA","type":"company","lineage":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"]},{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Joseph L. Greathouse","raw_affiliation_strings":["AMD Research Advanced Micro Devices, Inc","AMD Research, Advanced Micro Devices, Inc.#TAB#"],"affiliations":[{"raw_affiliation_string":"AMD Research Advanced Micro Devices, Inc","institution_ids":["https://openalex.org/I4210137977","https://openalex.org/I1311921367"]},{"raw_affiliation_string":"AMD Research, Advanced Micro Devices, Inc.#TAB#","institution_ids":["https://openalex.org/I1311921367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064499118","display_name":"Alexander Lyashevsky","orcid":null},"institutions":[{"id":"https://openalex.org/I1311921367","display_name":"Advanced Micro Devices (Canada)","ror":"https://ror.org/02yh0k313","country_code":"CA","type":"company","lineage":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"]},{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Alexander Lyashevsky","raw_affiliation_strings":["AMD Research Advanced Micro Devices, Inc","AMD Research, Advanced Micro Devices, Inc.#TAB#"],"affiliations":[{"raw_affiliation_string":"AMD Research Advanced Micro Devices, Inc","institution_ids":["https://openalex.org/I4210137977","https://openalex.org/I1311921367"]},{"raw_affiliation_string":"AMD Research, Advanced Micro Devices, Inc.#TAB#","institution_ids":["https://openalex.org/I1311921367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060736283","display_name":"Nuwan Jayasena","orcid":"https://orcid.org/0009-0005-2973-9479"},"institutions":[{"id":"https://openalex.org/I1311921367","display_name":"Advanced Micro Devices (Canada)","ror":"https://ror.org/02yh0k313","country_code":"CA","type":"company","lineage":["https://openalex.org/I1311921367","https://openalex.org/I4210137977"]},{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Nuwan Jayasena","raw_affiliation_strings":["AMD Research Advanced Micro Devices, Inc","AMD Research, Advanced Micro Devices, Inc.#TAB#"],"affiliations":[{"raw_affiliation_string":"AMD Research Advanced Micro Devices, Inc","institution_ids":["https://openalex.org/I4210137977","https://openalex.org/I1311921367"]},{"raw_affiliation_string":"AMD Research, Advanced Micro Devices, Inc.#TAB#","institution_ids":["https://openalex.org/I1311921367"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030464643","display_name":"Derek Chiou","orcid":"https://orcid.org/0009-0008-6762-4527"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Derek Chiou","raw_affiliation_strings":["Electrical and Computer Engineering The University of Texas at Austin","Electrical and Computer Engineering, The University of Texas at Austin"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Electrical and Computer Engineering, The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5058047421"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":23.5777,"has_fulltext":false,"cited_by_count":210,"citation_normalized_percentile":{"value":0.9974768,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"564","last_page":"576"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8121405839920044},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.7219597101211548},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5869633555412292},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5857878923416138},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5681086182594299},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5511252880096436},{"id":"https://openalex.org/keywords/frequency-scaling","display_name":"Frequency scaling","score":0.5251799821853638},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.47522714734077454},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.45840907096862793},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.45654770731925964},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4151410460472107},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.4131982624530792},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.41017335653305054},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3629586100578308},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.1245393455028534}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8121405839920044},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.7219597101211548},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5869633555412292},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5857878923416138},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5681086182594299},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5511252880096436},{"id":"https://openalex.org/C157742956","wikidata":"https://www.wikidata.org/wiki/Q3237776","display_name":"Frequency scaling","level":3,"score":0.5251799821853638},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.47522714734077454},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.45840907096862793},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.45654770731925964},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4151410460472107},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4131982624530792},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.41017335653305054},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3629586100578308},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.1245393455028534},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca.2015.7056063","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2015.7056063","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1968432902","https://openalex.org/W1975237352","https://openalex.org/W1979527452","https://openalex.org/W1985938075","https://openalex.org/W1986491730","https://openalex.org/W1987823408","https://openalex.org/W1993508117","https://openalex.org/W2001556242","https://openalex.org/W2028800864","https://openalex.org/W2029822446","https://openalex.org/W2032512814","https://openalex.org/W2033597569","https://openalex.org/W2036850008","https://openalex.org/W2039467384","https://openalex.org/W2048441570","https://openalex.org/W2059634925","https://openalex.org/W2063884320","https://openalex.org/W2070544163","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2088866486","https://openalex.org/W2093043622","https://openalex.org/W2103742924","https://openalex.org/W2112029501","https://openalex.org/W2113282196","https://openalex.org/W2118127549","https://openalex.org/W2122297544","https://openalex.org/W2130336316","https://openalex.org/W2130496984","https://openalex.org/W2131667412","https://openalex.org/W2132219981","https://openalex.org/W2142769604","https://openalex.org/W2149234156","https://openalex.org/W2151195685","https://openalex.org/W2152513418","https://openalex.org/W2153456949","https://openalex.org/W2153667821","https://openalex.org/W2157070686","https://openalex.org/W2158924248","https://openalex.org/W2161381500","https://openalex.org/W2161537396","https://openalex.org/W2167334577","https://openalex.org/W2169559170","https://openalex.org/W2169880332","https://openalex.org/W2183153107","https://openalex.org/W2273440736","https://openalex.org/W2468944713","https://openalex.org/W2475077126","https://openalex.org/W2541326062","https://openalex.org/W2739131812","https://openalex.org/W3136996852","https://openalex.org/W3137551601","https://openalex.org/W4206890074","https://openalex.org/W4246439759","https://openalex.org/W4285719527","https://openalex.org/W6668124466","https://openalex.org/W6679660563","https://openalex.org/W6682263260","https://openalex.org/W6683816520","https://openalex.org/W6694513646","https://openalex.org/W6720652222","https://openalex.org/W6742135662","https://openalex.org/W6807618959"],"related_works":["https://openalex.org/W2154351074","https://openalex.org/W2151223307","https://openalex.org/W2151046618","https://openalex.org/W1972148443","https://openalex.org/W1969233021","https://openalex.org/W2023400509","https://openalex.org/W2332054630","https://openalex.org/W2167646277","https://openalex.org/W2063573318","https://openalex.org/W2590100594"],"abstract_inverted_index":{"Graphics":[0],"Processing":[1],"Units":[2],"(GPUs)":[3],"have":[4],"numerous":[5,79],"configuration":[6,99],"and":[7,19,38,51,87,152,177,187,195],"design":[8,28],"options,":[9],"including":[10],"core":[11,175],"frequency,":[12],"number":[13,168],"of":[14,26,73,154,166,169,174,181,197,207],"parallel":[15],"compute":[16],"units":[17],"(CUs),":[18],"available":[20],"memory":[21,182],"bandwidth.":[22],"At":[23],"many":[24],"stages":[25],"the":[27,84,90,97,135,150,155,167,205,225],"process,":[29],"it":[30],"is":[31,68,100,145,202],"important":[32],"to":[33,148,192,204],"estimate":[34,149],"how":[35,93],"application":[36,113,157],"performance":[37,50,86,103,151,186],"power":[39,52,88,153,188],"are":[40,76,106,123,190],"impacted":[41],"by":[42],"these":[43],"options.":[44],"This":[45,142,201],"paper":[46],"describes":[47],"a":[48,71,111,115,126,171,178],"GPU":[49,64,117,160],"estimation":[53],"model":[54,67,91,217],"that":[55,75,129],"uses":[56],"machine":[57],"learning":[58],"techniques":[59],"on":[60,70,114,229],"measurements":[61],"from":[62,134],"real":[63,198,230],"hardware.":[65,231],"The":[66],"trained":[69],"collection":[72],"applications":[74,94],"run":[77],"at":[78,158],"different":[80,159],"hardware":[81],"configurations.":[82,161],"From":[83],"measured":[85],"data,":[89],"learns":[92],"scale":[95],"as":[96,219],"GPU's":[98],"changed.":[101],"Hardware":[102],"counter":[104,121],"values":[105,122],"then":[107,146],"gathered":[108],"when":[109],"running":[110,227],"new":[112,156],"single":[116],"configuration.":[118],"These":[119],"dynamic":[120],"fed":[124],"into":[125],"neural":[127],"network":[128],"predicts":[130],"which":[131],"scaling":[132,143],"curve":[133,144],"training":[136,214],"data":[137],"best":[138],"represents":[139],"this":[140],"kernel.":[141],"used":[147],"Over":[162],"an":[163,212],"8\u00d7":[164],"range":[165,173,180],"CUs,":[170],"3.3\u00d7":[172],"frequencies,":[176],"2.9\u00d7":[179],"bandwidth,":[183],"our":[184,216],"model's":[185],"estimates":[189],"accurate":[191],"within":[193],"15%":[194],"10%":[196],"hardware,":[199],"respectively.":[200],"comparable":[203],"accuracy":[206],"cycle-level":[208],"simulators.":[209],"However,":[210],"after":[211],"initial":[213],"phase,":[215],"runs":[218],"fast":[220],"as,":[221],"or":[222],"faster":[223],"than":[224],"program":[226],"natively":[228]},"counts_by_year":[{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":19},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":27},{"year":2019,"cited_by_count":33},{"year":2018,"cited_by_count":22},{"year":2017,"cited_by_count":30},{"year":2016,"cited_by_count":18},{"year":2015,"cited_by_count":3},{"year":2013,"cited_by_count":1}],"updated_date":"2026-02-07T06:11:34.122080","created_date":"2025-10-10T00:00:00"}
