{"id":"https://openalex.org/W4409248836","doi":"https://doi.org/10.1109/hpca61900.2025.00103","title":"throttLL\u2019eM: Predictive GPU Throttling for Energy Efficient LLM Inference Serving","display_name":"throttLL\u2019eM: Predictive GPU Throttling for Energy Efficient LLM Inference Serving","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248836","doi":"https://doi.org/10.1109/hpca61900.2025.00103"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00103","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048124925","display_name":"Andreas Kosmas Kakolyris","orcid":"https://orcid.org/0009-0000-4915-8309"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Andreas Kosmas Kakolyris","raw_affiliation_strings":["ETH,Z&#x00FC;rich"],"affiliations":[{"raw_affiliation_string":"ETH,Z&#x00FC;rich","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028774103","display_name":"Dimosthenis Masouros","orcid":"https://orcid.org/0000-0001-6147-6908"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Dimosthenis Masouros","raw_affiliation_strings":["National Technical University of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006082842","display_name":"Petros Vavaroutsos","orcid":"https://orcid.org/0000-0003-1929-5649"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Petros Vavaroutsos","raw_affiliation_strings":["National Technical University of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076432415","display_name":"Sotirios Xydis","orcid":"https://orcid.org/0000-0003-3151-2730"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Sotirios Xydis","raw_affiliation_strings":["National Technical University of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043131021","display_name":"Dimitrios Soudris","orcid":"https://orcid.org/0000-0002-6930-6847"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Dimitrios Soudris","raw_affiliation_strings":["National Technical University of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048124925"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.8233,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.95706774,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1363","last_page":"1378"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9733999967575073,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9733999967575073,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9358999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bandwidth-throttling","display_name":"Bandwidth throttling","score":0.9279282689094543},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6940815448760986},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5929474234580994},{"id":"https://openalex.org/keywords/predictive-analytics","display_name":"Predictive analytics","score":0.42058566212654114},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.38888996839523315},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21416082978248596},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20853063464164734},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.06876319646835327}],"concepts":[{"id":"https://openalex.org/C173061102","wikidata":"https://www.wikidata.org/wiki/Q478819","display_name":"Bandwidth throttling","level":3,"score":0.9279282689094543},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6940815448760986},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5929474234580994},{"id":"https://openalex.org/C83209312","wikidata":"https://www.wikidata.org/wiki/Q1053367","display_name":"Predictive analytics","level":2,"score":0.42058566212654114},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.38888996839523315},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21416082978248596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20853063464164734},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.06876319646835327},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C131097465","wikidata":"https://www.wikidata.org/wiki/Q178898","display_name":"Gas compressor","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00103","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8799999952316284,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334322","display_name":"HORIZON EUROPE Framework Programme","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1495021188","https://openalex.org/W1678356000","https://openalex.org/W2295598076","https://openalex.org/W2966983573","https://openalex.org/W3011043556","https://openalex.org/W3043571714","https://openalex.org/W3096543861","https://openalex.org/W3097411828","https://openalex.org/W3112048118","https://openalex.org/W3130716829","https://openalex.org/W3159103505","https://openalex.org/W4206943884","https://openalex.org/W4280507006","https://openalex.org/W4283704460","https://openalex.org/W4321636575","https://openalex.org/W4360831842","https://openalex.org/W4384705353","https://openalex.org/W4385481791","https://openalex.org/W4387321091","https://openalex.org/W4388874804","https://openalex.org/W4390263770","https://openalex.org/W4394998727","https://openalex.org/W4395106452","https://openalex.org/W4395112660","https://openalex.org/W4399074329","https://openalex.org/W4401211704","https://openalex.org/W4402683901","https://openalex.org/W4404386015","https://openalex.org/W6730956707","https://openalex.org/W6739901393","https://openalex.org/W6754244489","https://openalex.org/W6755207826","https://openalex.org/W6765484274","https://openalex.org/W6768851824","https://openalex.org/W6784223070","https://openalex.org/W6785079351","https://openalex.org/W6792558049","https://openalex.org/W6811340617","https://openalex.org/W6811403424","https://openalex.org/W6838322825","https://openalex.org/W6846659131","https://openalex.org/W6847386241","https://openalex.org/W6849514112","https://openalex.org/W6849530321","https://openalex.org/W6850625674","https://openalex.org/W6850927664","https://openalex.org/W6852691820","https://openalex.org/W6853192989","https://openalex.org/W6853336479","https://openalex.org/W6854308872","https://openalex.org/W6857439638","https://openalex.org/W6859470353","https://openalex.org/W6860155063","https://openalex.org/W6860312956","https://openalex.org/W6860710830","https://openalex.org/W6861495178","https://openalex.org/W6861885378","https://openalex.org/W6862520094","https://openalex.org/W6863347513","https://openalex.org/W6864858179","https://openalex.org/W6865347877","https://openalex.org/W6870282523"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2393348554","https://openalex.org/W2349133582","https://openalex.org/W2010203168","https://openalex.org/W3151308896","https://openalex.org/W2015072665","https://openalex.org/W2069213387","https://openalex.org/W2377698884"],"abstract_inverted_index":{"As":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"gain":[5],"traction,":[6],"their":[7],"reliance":[8],"on":[9,130,134],"power-hungry":[10],"GPUs":[11],"places":[12],"ever-increasing":[13],"energy":[14,32,53,149,153],"demands,":[15],"raising":[16],"environmental":[17],"and":[18,63,77,104,120,151],"monetary":[19],"concerns.":[20],"Inference":[21],"dominates":[22],"LLM":[23,135],"workloads,":[24],"presenting":[25],"a":[26,49,81],"critical":[27],"challenge":[28],"for":[29],"providers:":[30],"minimizing":[31],"costs":[33],"under":[34,161],"Service-Level":[35],"Objectives":[36],"(SLOs)":[37],"that":[38,51,70,85,109,139],"ensure":[39],"optimal":[40],"user":[41],"experience.":[42],"In":[43],"this":[44],"paper,":[45],"we":[46],"present":[47],"throttLL\u2019eM,":[48],"framework":[50],"reduces":[52],"consumption":[54,150],"while":[55],"meeting":[56],"SLOs":[57,100],"through":[58],"the":[59,95,110],"use":[60],"of":[61,156],"instance":[62,105],"GPU":[64],"frequency":[65],"scaling.":[66],"throttLL\u2019eM":[67,91,140,169],"features":[68],"mechanisms":[69],"project":[71],"future":[72],"Key-Value":[73],"(KV)":[74],"cache":[75],"usage":[76],"batch":[78],"size.":[79],"Leveraging":[80],"Machine-Learning":[82],"(ML)":[83],"model":[84,113],"receives":[86],"these":[87],"projections":[88],"as":[89],"inputs,":[90],"manages":[92],"performance":[93,122],"at":[94,157,173],"iteration":[96,127],"level":[97],"to":[98,143,165],"satisfy":[99],"with":[101],"reduced":[102],"frequencies":[103],"sizes.":[106],"We":[107],"show":[108,138],"proposed":[111],"ML":[112],"achieves":[114,141],"$R^{2}$":[115],"scores":[116],"greater":[117],"than":[118,125],"0.97":[119],"miss-predicts":[121],"by":[123],"less":[124],"1":[126],"per":[128],"second":[129],"average.":[131],"Experimental":[132],"results":[133],"inference":[136],"traces":[137],"up":[142],"$\\mathbf{4":[144],"3.":[145],"8":[146],"\\%}$":[147],"lower":[148],"an":[152],"efficiency":[154],"improvement":[155],"least":[158],"$1.71":[159],"\\times$":[160],"SLOs,":[162],"when":[163],"compared":[164],"NVIDIA\u2019s":[166],"Triton":[167],"server.":[168],"is":[170],"publicly":[171],"available":[172],"https://github.com/WilliamBlaskowicz/throttLL-eM.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
