{"id":"https://openalex.org/W3006451002","doi":"https://doi.org/10.1145/3387902.3392613","title":"Verified instruction-level energy consumption measurement for NVIDIA GPUs","display_name":"Verified instruction-level energy consumption measurement for NVIDIA GPUs","publication_year":2020,"publication_date":"2020-05-11","ids":{"openalex":"https://openalex.org/W3006451002","doi":"https://doi.org/10.1145/3387902.3392613","mag":"3006451002"},"language":"en","primary_location":{"id":"doi:10.1145/3387902.3392613","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3387902.3392613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Computing Frontiers","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2002.07795","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yehia Arafa","orcid":null},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yehia Arafa","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ammar ElWazir","orcid":null},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ammar ElWazir","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Abdelrahman ElKanishy","orcid":null},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdelrahman ElKanishy","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Youssef Aly","orcid":null},"institutions":[{"id":"https://openalex.org/I59272784","display_name":"Arab Academy for Science, Technology, and Maritime Transport","ror":"https://ror.org/0004vyj87","country_code":"EG","type":"education","lineage":["https://openalex.org/I59272784"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Youssef Aly","raw_affiliation_strings":["Arab Academy for Science, Technology &amp; Maritime Transport, Egypt"],"affiliations":[{"raw_affiliation_string":"Arab Academy for Science, Technology &amp; Maritime Transport, Egypt","institution_ids":["https://openalex.org/I59272784"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ayatelrahman Elsayed","orcid":null},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ayatelrahman Elsayed","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Abdel-Hameed Badawy","orcid":null},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdel-Hameed Badawy","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gopinath Chennupati","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gopinath Chennupati","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Stephan Eidenbenz","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephan Eidenbenz","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"last","author":{"id":null,"display_name":"Nandakishore Santhi","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nandakishore Santhi","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I10052268"],"apc_list":null,"apc_paid":null,"fwci":3.0448,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.91252485,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"60","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12238","display_name":"Green IT and Sustainability","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.6105999946594238},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6093999743461609},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5825999975204468},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5615000128746033},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5110999941825867},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4921000003814697},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4674000144004822},{"id":"https://openalex.org/keywords/power-consumption","display_name":"Power consumption","score":0.42160001397132874}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8215000033378601},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.6105999946594238},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6093999743461609},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5825999975204468},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5615000128746033},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5482000112533569},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5110999941825867},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4921000003814697},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4674000144004822},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.42160001397132874},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.41780000925064087},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.390500009059906},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.34529998898506165},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3387902.3392613","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3387902.3392613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Computing Frontiers","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2002.07795","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.07795","pdf_url":"https://arxiv.org/pdf/2002.07795","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2002.07795","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.07795","pdf_url":"https://arxiv.org/pdf/2002.07795","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1656664476","https://openalex.org/W1874230839","https://openalex.org/W1968902482","https://openalex.org/W1977661221","https://openalex.org/W1986989509","https://openalex.org/W2033597569","https://openalex.org/W2038924755","https://openalex.org/W2066804546","https://openalex.org/W2069590084","https://openalex.org/W2070310479","https://openalex.org/W2092340922","https://openalex.org/W2093043622","https://openalex.org/W2122002474","https://openalex.org/W2131218797","https://openalex.org/W2226694076","https://openalex.org/W2522820499","https://openalex.org/W2580538010","https://openalex.org/W2762578638","https://openalex.org/W2807317340","https://openalex.org/W2887659847","https://openalex.org/W2906684012","https://openalex.org/W2921788688","https://openalex.org/W2933483662","https://openalex.org/W2947179428","https://openalex.org/W2991330024","https://openalex.org/W2998901775","https://openalex.org/W3039972296","https://openalex.org/W4240739471","https://openalex.org/W4248228503","https://openalex.org/W4250244364"],"related_works":[],"abstract_inverted_index":{"GPUs":[0,69,140],"are":[1],"prevalent":[2],"in":[3,18,51,162],"modern":[4,52],"computing":[5],"systems":[6],"at":[7],"all":[8,147],"scales.":[9],"They":[10],"consume":[11],"a":[12,130],"significant":[13],"fraction":[14],"of":[15,29,33,46,60,84,93,146,155,173],"the":[16,26,30,43,82,85,90,104,125,142,148,152,156],"energy":[17,44,91,144,171],"these":[19,120],"systems.":[20],"However,":[21],"vendors":[22],"do":[23],"not":[24],"publish":[25],"actual":[27],"cost":[28],"power/energy":[31],"overhead":[32],"their":[34],"internal":[35],"microarchitecture.":[36,166],"In":[37],"this":[38],"paper,":[39],"we":[40,80,123],"accurately":[41],"measure":[42],"consumption":[45,92],"various":[47],"PTX":[48],"instructions":[49,64],"found":[50],"NVIDIA":[53,68,164],"GPUs.":[54],"We":[55,96],"provide":[56,115],"an":[57,116],"exhaustive":[58],"comparison":[59,118],"more":[61],"than":[62],"40":[63],"for":[65,151],"four":[66,71],"high-end":[67],"from":[70],"different":[72,99,153],"generations":[73,150],"(Maxwell,":[74],"Pascal,":[75],"Volta,":[76],"and":[77,114,179],"Turing).":[78],"Furthermore,":[79],"show":[81,137],"effect":[83],"CUDA":[86],"compiler":[87],"optimizations":[88],"on":[89],"each":[94],"instruction.":[95],"use":[97,110],"three":[98],"software":[100,126],"techniques":[101,128],"to":[102],"read":[103],"GPU":[105,175],"on-chip":[106],"power":[107,133],"sensors,":[108],"which":[109],"NVIDIA's":[111],"NVML":[112],"API":[113],"in-depth":[117],"between":[119],"techniques.":[121],"Additionally,":[122],"verified":[124],"measurement":[127],"against":[129],"custom-designed":[131],"hardware":[132],"measurement.":[134],"The":[135],"results":[136],"that":[138],"Volta":[139],"have":[141],"best":[143],"efficiency":[145],"other":[149],"categories":[154],"instructions.":[157],"This":[158],"work":[159],"should":[160,168],"aid":[161],"understanding":[163],"GPUs'":[165],"It":[167],"also":[169],"make":[170],"measurements":[172],"any":[174],"kernel":[176],"both":[177],"efficient":[178],"accurate.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-02-24T00:00:00"}
