{"id":"https://openalex.org/W4390188279","doi":"https://doi.org/10.1109/hpec58863.2023.10363476","title":"Build Energy-Efficient GPU Computing Environment for Machine Learning Algorithms with Register File Packing Technique","display_name":"Build Energy-Efficient GPU Computing Environment for Machine Learning Algorithms with Register File Packing Technique","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4390188279","doi":"https://doi.org/10.1109/hpec58863.2023.10363476"},"language":"en","primary_location":{"id":"doi:10.1109/hpec58863.2023.10363476","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/hpec58863.2023.10363476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100327844","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8639-3818"},"institutions":[{"id":"https://openalex.org/I184840846","display_name":"Virginia Commonwealth University","ror":"https://ror.org/02nkdxk79","country_code":"US","type":"education","lineage":["https://openalex.org/I184840846"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xin Wang","raw_affiliation_strings":["Virginia Commonwealth University,Department of Electrical and Computer Engineering,Richmond,VA,23284"],"affiliations":[{"raw_affiliation_string":"Virginia Commonwealth University,Department of Electrical and Computer Engineering,Richmond,VA,23284","institution_ids":["https://openalex.org/I184840846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100441493","display_name":"Wei Zhang","orcid":"https://orcid.org/0009-0007-7082-1014"},"institutions":[{"id":"https://openalex.org/I142740786","display_name":"University of Louisville","ror":"https://ror.org/01ckdn478","country_code":"US","type":"education","lineage":["https://openalex.org/I142740786"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["University of Louisville,Department of Electrical and Computer Engineering,Louisville,KY,40292"],"affiliations":[{"raw_affiliation_string":"University of Louisville,Department of Electrical and Computer Engineering,Louisville,KY,40292","institution_ids":["https://openalex.org/I142740786"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100327844"],"corresponding_institution_ids":["https://openalex.org/I184840846"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19362437,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8107441663742065},{"id":"https://openalex.org/keywords/register-file","display_name":"Register file","score":0.7989202737808228},{"id":"https://openalex.org/keywords/operand","display_name":"Operand","score":0.7937122583389282},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.6773793697357178},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5263344645500183},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.47945037484169006},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.43350130319595337},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.42262518405914307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38497063517570496},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.22936859726905823},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.16153311729431152}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8107441663742065},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.7989202737808228},{"id":"https://openalex.org/C55526617","wikidata":"https://www.wikidata.org/wiki/Q719375","display_name":"Operand","level":2,"score":0.7937122583389282},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6773793697357178},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5263344645500183},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47945037484169006},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.43350130319595337},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.42262518405914307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38497063517570496},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.22936859726905823},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.16153311729431152},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec58863.2023.10363476","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/hpec58863.2023.10363476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8700000047683716,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1935978687","https://openalex.org/W1979527452","https://openalex.org/W2043083835","https://openalex.org/W2072768743","https://openalex.org/W2115157620","https://openalex.org/W2119144962","https://openalex.org/W2128022558","https://openalex.org/W2155503253","https://openalex.org/W2194775991","https://openalex.org/W2279098554","https://openalex.org/W2495425901","https://openalex.org/W2499540656","https://openalex.org/W2516141709","https://openalex.org/W2534060050","https://openalex.org/W2618530766","https://openalex.org/W2902700387","https://openalex.org/W2963981420","https://openalex.org/W2964217848","https://openalex.org/W3006997840","https://openalex.org/W3117272675","https://openalex.org/W3148444620","https://openalex.org/W4234833047","https://openalex.org/W6637373629","https://openalex.org/W6677580257","https://openalex.org/W6695314431","https://openalex.org/W6723181079","https://openalex.org/W6724998850"],"related_works":["https://openalex.org/W2105325426","https://openalex.org/W2245451942","https://openalex.org/W2088203538","https://openalex.org/W2116521658","https://openalex.org/W1580009706","https://openalex.org/W2100504584","https://openalex.org/W2763549657","https://openalex.org/W2123413169","https://openalex.org/W4246431097","https://openalex.org/W4248763955"],"abstract_inverted_index":{"Popular":[0],"machine":[1,42,59,82,115,178],"learning":[2,43,60,83,116,179],"algorithms":[3,61,84],"built":[4],"with":[5,62,175],"a":[6,32,63,89,108,127],"mass":[7],"of":[8,80,92,111,152,162,170],"matrix":[9],"multiplications":[10],"can":[11,137],"be":[12,138],"well":[13],"paralleled":[14],"and":[15,148,200],"the":[16,26,37,101,105,131,134,159,163,167,186,191],"GPUs":[17,30],"are":[18,85],"desirable":[19],"computing":[20,55],"environment":[21,56],"for":[22,41,57,114],"these":[23],"applications.":[24],"However,":[25],"energy":[27,150,168,194],"consumption":[28,151,195],"on":[29,203],"becomes":[31],"big":[33],"concern":[34],"which":[35],"prevents":[36],"further":[38,139],"performance":[39],"increases":[40],"algorithms.":[44,117,180],"In":[45],"this":[46,171],"work,":[47],"we":[48,75,98,142],"aim":[49],"to":[50,120,126,144,198],"build":[51],"an":[52],"energy-efficient":[53],"GPU":[54,64,172,193],"famous":[58],"register":[65,132,187],"file":[66],"management":[67,174],"theory":[68],"named":[69],"narrow":[70,123],"width":[71,124],"operand":[72],"packing.":[73],"First,":[74],"observed":[76],"that":[77,100,185],"RF":[78,94,106,135,154,173],"occupancies":[79,136],"modern":[81],"relatively":[86],"low":[87],"leaving":[88],"great":[90],"waste":[91],"GPU's":[93,153],"leakage":[95],"energy.":[96],"Second,":[97],"found":[99],"data":[102],"maintained":[103],"by":[104,155],"contains":[107],"large":[109],"fraction":[110],"narrow-width":[112],"operands":[113,125],"We":[118,165],"proposed":[119],"pack":[121],"multiple":[122],"single":[128],"register.":[129],"After":[130],"packing,":[133],"reduced.":[140],"Finally,":[141],"attempt":[143],"save":[145],"both":[146],"static":[147],"dynamic":[149],"smartly":[156],"shutting":[157],"down":[158],"unused":[160],"portion":[161],"RF.":[164],"evaluated":[166],"reduction":[169],"five":[176],"state-of-the-art":[177],"The":[181],"experimental":[182],"results":[183],"show":[184],"packing":[188],"techniques":[189],"achieve":[190],"total":[192],"reduction,":[196],"up":[197],"14.14%":[199],"10.71":[201],"%":[202],"average.":[204]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
