{"id":"https://openalex.org/W7161249719","doi":"https://doi.org/10.48550/arxiv.2605.14249","title":"EnergyLens: Predictive Energy-Aware Exploration for Multi-GPU LLM Inference Optimization","display_name":"EnergyLens: Predictive Energy-Aware Exploration for Multi-GPU LLM Inference Optimization","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161249719","doi":"https://doi.org/10.48550/arxiv.2605.14249"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.14249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.14249","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050263836","display_name":"Zhiye Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Zhiye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136214192","display_name":"Kyungmi Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Kyungmi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136239501","display_name":"Eun Kyung Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Eun Kyung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136263281","display_name":"Xin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136191498","display_name":"Tamar Eilam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eilam, Tamar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136259317","display_name":"Anantha P. Chandrakasan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chandrakasan, Anantha P.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19429999589920044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19429999589920044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.18850000202655792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.14820000529289246,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6651999950408936},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6402000188827515},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5946999788284302},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.49950000643730164},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.39160001277923584},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.35690000653266907}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7199000120162964},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6651999950408936},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6402000188827515},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5946999788284302},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.49950000643730164},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46799999475479126},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.39160001277923584},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37139999866485596},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.35690000653266907},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C66204764","wikidata":"https://www.wikidata.org/wiki/Q219416","display_name":"Sustainability","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.25529998540878296},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.14249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.14249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.43414339423179626,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,107,165],"present":[1],"EnergyLens,":[2],"an":[3,78,98],"end-to-end":[4],"framework":[5],"for":[6,26,55,62,104,129,140],"energy-aware":[7],"large":[8],"language":[9],"model":[10,103],"(LLM)":[11],"inference":[12],"optimization.":[13],"As":[14,49],"LLMs":[15],"scale,":[16],"predicting":[17],"and":[18,28,38,61,89,97,112,116,127,132,135,150,158,161],"reducing":[19],"their":[20],"energy":[21,47,102,152],"footprint":[22],"has":[23],"become":[24],"critical":[25],"sustainability":[27],"datacenter":[29],"operations,":[30],"yet":[31],"existing":[32,65],"approaches":[33],"either":[34],"require":[35],"production-level":[36],"code":[37],"expensive":[39],"profiling":[40,70],"or":[41],"fail":[42],"to":[43,59,148,173],"accurately":[44],"capture":[45],"multi-GPU":[46,105,130],"behavior.":[48],"a":[50],"result,":[51],"practitioners":[52],"lack":[53],"tools":[54],"deciding":[56],"which":[57],"optimizations":[58],"prioritize":[60],"selecting":[63],"among":[64],"deployment":[66],"configurations":[67,155],"when":[68],"exhaustive":[69],"is":[71,171],"impractical.":[72],"EnergyLens":[73,109,179],"addresses":[74],"this":[75],"gap":[76],"with":[77,93,175],"intuitive":[79],"einsum-based":[80],"interface":[81],"that":[82,168],"captures":[83],"LLM":[84],"specifications":[85],"including":[86],"fusion,":[87],"parallelism,":[88],"compute-communication":[90,169],"overlap,":[91],"combined":[92],"load-imbalance-aware":[94],"MoE":[95],"modeling":[96],"empirically":[99],"driven":[100],"communication":[101],"settings.":[106],"validate":[108],"on":[110],"Llama3":[111],"Qwen3-MoE":[113],"across":[114,137,154],"tensor-parallel":[115],"expert-parallel":[117],"configurations,":[118],"achieving":[119],"mean":[120],"absolute":[121],"percentage":[122],"errors":[123],"(MAPEs)":[124],"between":[125],"9.25%":[126],"13.19%":[128],"prefill":[131,157],"decode":[133,159],"energy,":[134],"12.97%":[136],"SM":[138],"allocations":[139],"Megatron-style":[141],"overlap.":[142],"Our":[143],"energy-driven":[144],"exploration":[145],"reveals":[146],"up":[147],"1.47x":[149],"52.9x":[151],"variation":[153],"in":[156],"efficiency":[160],"motivates":[162],"distributed":[163],"serving.":[164],"further":[166],"show":[167],"overlap":[170,183],"difficult":[172],"optimize":[174],"intuition":[176],"alone,":[177],"but":[178],"correctly":[180],"identifies":[181],"Pareto-optimal":[182],"configurations.":[184]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
