{"id":"https://openalex.org/W4360831782","doi":"https://doi.org/10.1109/hpca56546.2023.10070943","title":"Know Your Enemy To Save Cloud Energy: Energy-Performance Characterization of Machine Learning Serving","display_name":"Know Your Enemy To Save Cloud Energy: Energy-Performance Characterization of Machine Learning Serving","publication_year":2023,"publication_date":"2023-02-01","ids":{"openalex":"https://openalex.org/W4360831782","doi":"https://doi.org/10.1109/hpca56546.2023.10070943"},"language":"en","primary_location":{"id":"doi:10.1109/hpca56546.2023.10070943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072409365","display_name":"Junyeol Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Junyeol Yu","raw_affiliation_strings":["Sungkyunkwan University,Dept. of Computer Science and Engineering","Dept. of Computer Science and Engineering, Sungkyunkwan University"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University,Dept. of Computer Science and Engineering","institution_ids":["https://openalex.org/I848706"]},{"raw_affiliation_string":"Dept. of Computer Science and Engineering, Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101531766","display_name":"Jong-Seok Kim","orcid":"https://orcid.org/0000-0002-4560-2553"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongseok Kim","raw_affiliation_strings":["Sungkyunkwan University,Dept. of Computer Science and Engineering","Dept. of Computer Science and Engineering, Sungkyunkwan University"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University,Dept. of Computer Science and Engineering","institution_ids":["https://openalex.org/I848706"]},{"raw_affiliation_string":"Dept. of Computer Science and Engineering, Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062665275","display_name":"Euiseong Seo","orcid":"https://orcid.org/0000-0003-2103-8019"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Euiseong Seo","raw_affiliation_strings":["Sungkyunkwan University,Dept. of Computer Science and Engineering","Dept. of Computer Science and Engineering, Sungkyunkwan University"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University,Dept. of Computer Science and Engineering","institution_ids":["https://openalex.org/I848706"]},{"raw_affiliation_string":"Dept. of Computer Science and Engineering, Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072409365"],"corresponding_institution_ids":["https://openalex.org/I848706"],"apc_list":null,"apc_paid":null,"fwci":4.5889,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.94966546,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"842","last_page":"854"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.84278404712677},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7753225564956665},{"id":"https://openalex.org/keywords/frequency-scaling","display_name":"Frequency scaling","score":0.7445282340049744},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.6237592101097107},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5964289903640747},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5763065814971924},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.5309799313545227},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4055600166320801},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40514740347862244},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.23708948493003845},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1728142499923706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84278404712677},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7753225564956665},{"id":"https://openalex.org/C157742956","wikidata":"https://www.wikidata.org/wiki/Q3237776","display_name":"Frequency scaling","level":3,"score":0.7445282340049744},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.6237592101097107},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5964289903640747},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5763065814971924},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.5309799313545227},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4055600166320801},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40514740347862244},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.23708948493003845},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1728142499923706},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca56546.2023.10070943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8299999833106995,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W18104785","https://openalex.org/W1686810756","https://openalex.org/W1980489165","https://openalex.org/W1990962327","https://openalex.org/W2012975714","https://openalex.org/W2020267722","https://openalex.org/W2050127041","https://openalex.org/W2063244929","https://openalex.org/W2074084090","https://openalex.org/W2084110734","https://openalex.org/W2096661534","https://openalex.org/W2161125731","https://openalex.org/W2183341477","https://openalex.org/W2302255633","https://openalex.org/W2522820499","https://openalex.org/W2522956255","https://openalex.org/W2529165666","https://openalex.org/W2626312854","https://openalex.org/W2757024591","https://openalex.org/W2770775905","https://openalex.org/W2772948367","https://openalex.org/W2794670651","https://openalex.org/W2896457183","https://openalex.org/W2901549770","https://openalex.org/W2915018956","https://openalex.org/W2945148926","https://openalex.org/W2973657294","https://openalex.org/W3006451002","https://openalex.org/W3007494972","https://openalex.org/W3007855629","https://openalex.org/W3016842236","https://openalex.org/W3047371394","https://openalex.org/W3083385342","https://openalex.org/W3084790829","https://openalex.org/W3093818819","https://openalex.org/W3095488153","https://openalex.org/W3098742859","https://openalex.org/W3104528661","https://openalex.org/W3118922386","https://openalex.org/W3130689885","https://openalex.org/W3148457660","https://openalex.org/W3163287424","https://openalex.org/W3186289964","https://openalex.org/W4214894150","https://openalex.org/W4253914488","https://openalex.org/W4255681033","https://openalex.org/W6634438429","https://openalex.org/W6637373629","https://openalex.org/W6647313123","https://openalex.org/W6677973695","https://openalex.org/W6730956707","https://openalex.org/W6755207826","https://openalex.org/W6762871624","https://openalex.org/W6765484274","https://openalex.org/W6778371292","https://openalex.org/W6779103662","https://openalex.org/W6798686915","https://openalex.org/W6801603025"],"related_works":["https://openalex.org/W1967088250","https://openalex.org/W2388464034","https://openalex.org/W4214747436","https://openalex.org/W2953002963","https://openalex.org/W179829755","https://openalex.org/W2035968155","https://openalex.org/W2943912161","https://openalex.org/W2973058717","https://openalex.org/W2918887334","https://openalex.org/W4360831782"],"abstract_inverted_index":{"The":[0,27,116,146,182,218],"proportion":[1],"of":[2,32,57,88,97,112,119,130,143,157,186,233,250],"machine":[3],"learning":[4],"(ML)":[5],"inference":[6,38,90,180,203],"in":[7,70,92,103],"modern":[8],"cloud":[9],"workloads":[10,39],"is":[11,34],"rapidly":[12],"increasing,":[13],"and":[14,133,152,196,215],"graphic":[15],"processing":[16],"units":[17],"(GPUs)":[18],"are":[19],"the":[20,37,53,93,128,140,202,225,234],"most":[21],"preferred":[22],"computational":[23],"accelerators":[24],"for":[25,109,178],"it.":[26],"massively":[28],"parallel":[29],"computing":[30],"capability":[31],"GPUs":[33,96,132,159],"well-suited":[35],"to":[36,52,106,231],"but":[40],"consumes":[41],"more":[42],"power":[43,55,65,68],"than":[44],"conventional":[45],"CPUs.":[46],"Therefore,":[47],"GPU":[48,67,121,174,194,197],"servers":[49,245],"contribute":[50],"significantly":[51,124],"total":[54],"consumption":[56,237],"a":[58,110,120,172],"data":[59],"center.":[60],"However,":[61],"despite":[62],"their":[63,134],"heavy":[64],"consumption,":[66],"management":[69,176],"cloud-scale":[71,179,216,235],"has":[72],"not":[73],"yet":[74],"been":[75],"actively":[76],"researched.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81,170],"reveal":[82],"three":[83,247],"findings":[84],"about":[85],"energy":[86,104,117,236],"efficiency":[87,105,118],"ML":[89,113,241],"clusters":[91],"cloud.":[94],"\u2776":[95],"different":[98,248],"architectures":[99],"have":[100],"comparative":[101],"advantages":[102],"each":[107],"other":[108],"set":[111,122],"models.":[114],"\u2777":[115],"may":[123],"vary":[125],"depending":[126],"on":[127,167],"number":[129],"active":[131],"clock":[135,164,198],"frequencies":[136],"even":[137],"when":[138,238],"producing":[139],"same":[141],"level":[142,148],"throughput.":[144],"\u2778":[145],"service":[147,204],"objective(SLO)-blind":[149],"dynamic":[150],"voltage":[151],"frequency":[153],"scaling":[154,195,199],"(DVFS)":[155],"driver":[156],"commercial":[158],"maintain":[160],"an":[161],"immoderately":[162],"high":[163],"frequency.":[165],"Based":[166],"these":[168],"implications,":[169],"propose":[171],"hierarchical":[173],"resource":[175],"approach":[177,184,210],"services.":[181],"proposed":[183,226],"consists":[185],"energy-aware":[187],"cluster":[188],"allocation,":[189],"intra-cluster":[190],"node":[191],"scaling,":[192],"intra-node":[193],"schemes":[200,227],"considering":[201],"architecture":[205],"hierarchy.":[206],"We":[207],"evaluated":[208],"our":[209],"with":[211,220,243],"its":[212],"prototype":[213],"implementation":[214],"simulation.":[217],"evaluation":[219],"real-world":[221],"traces":[222],"showed":[223],"that":[224],"can":[228],"save":[229],"up":[230],"28.3%":[232],"serving":[239],"five":[240],"models":[242],"105":[244],"having":[246],"kinds":[249],"GPUs.":[251]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
