{"id":"https://openalex.org/W7140185737","doi":"https://doi.org/10.48550/arxiv.2603.20224","title":"Beyond Test-Time Compute Strategies: Advocating Energy-per-Token in LLM Inference","display_name":"Beyond Test-Time Compute Strategies: Advocating Energy-per-Token in LLM Inference","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7140185737","doi":"https://doi.org/10.48550/arxiv.2603.20224"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20224","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20224","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20224","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wilhelm, Patrick","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wilhelm, Patrick","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wittkopp, Thorsten","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wittkopp, Thorsten","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Kao, Odej","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kao, Odej","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19859999418258667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19859999418258667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.1251000016927719,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10740000009536743,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6589000225067139},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6243000030517578},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.48750001192092896},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4717999994754791},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4323999881744385},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.3587000072002411},{"id":"https://openalex.org/keywords/computational-model","display_name":"Computational model","score":0.3418999910354614}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7386999726295471},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6589000225067139},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6243000030517578},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5188999772071838},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.48750001192092896},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4717999994754791},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4323999881744385},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3847000002861023},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.2777999937534332}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20224","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20224","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20224","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20224","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.47305500507354736,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1,38],"Models":[2,39],"(LLMs)":[3],"demonstrate":[4],"exceptional":[5],"performance":[6,68],"across":[7],"diverse":[8],"tasks":[9],"but":[10],"come":[11],"with":[12,52,134],"substantial":[13],"energy":[14,84,125],"and":[15,29,181],"computational":[16,75],"costs,":[17,85],"particularly":[18],"in":[19,95,122,158],"request-heavy":[20],"scenarios.":[21],"In":[22],"many":[23],"real-world":[24],"applications,":[25],"the":[26,67,107,113],"full":[27],"scale":[28],"capabilities":[30],"of":[31,69,117],"LLMs":[32],"are":[33],"often":[34],"unnecessary,":[35],"as":[36,57,145],"Small":[37],"(SLMs)":[40],"can":[41,65,80],"provide":[42],"accurate":[43],"responses":[44],"for":[45,99,128,186],"simpler":[46],"text":[47],"generation":[48],"tasks.":[49],"When":[50],"enhanced":[51],"advanced":[53],"reasoning":[54,157,167],"strategies,":[55],"such":[56],"Chain-of-Thought":[58],"(CoT)":[59],"prompting":[60],"or":[61],"Majority":[62],"Voting,":[63],"SLMs":[64],"approach":[66],"larger":[70,104],"models":[71,101],"while":[72],"reducing":[73],"overall":[74],"requirements.":[76],"However,":[77],"these":[78,93],"strategies":[79,98,183],"also":[81],"introduce":[82],"additional":[83],"creating":[86],"an":[87],"energy-accuracy":[88],"trade-off.":[89],"Our":[90],"analysis":[91],"examines":[92],"trade-offs":[94],"test-time":[96],"compute":[97],"smaller":[100],"compared":[102],"to":[103,147,165],"ones,":[105],"using":[106,162],"MMLU":[108],"benchmark.":[109],"Additionally,":[110],"we":[111,138,154],"explore":[112],"input-output":[114],"token":[115,160],"dynamics":[116],"transformer":[118],"architectures,":[119],"which":[120],"result":[121],"nonlinear":[123],"hardware":[124],"operation":[126],"curves":[127,164],"LLMs.":[129],"To":[130],"bridge":[131],"AI":[132,188],"research":[133],"its":[135],"physical":[136],"impact,":[137],"propose":[139,155],"\\textit{energy":[140],"efficiency":[141],"metrics},":[142],"including":[143],"Energy-per-Token,":[144],"complements":[146],"traditional":[148],"accuracy":[149,185],"benchmarks.":[150],"Beyond":[151],"model":[152,179],"selection,":[153],"controlled":[156],"CoT":[159],"generation,":[161],"operating":[163],"regulate":[166],"depth":[168],"dynamically.":[169],"This":[170],"vision":[171],"integrates":[172],"a":[173],"energy-aware":[174],"routing":[175],"mechanism,":[176],"ensuring":[177],"that":[178],"selection":[180],"inference":[182],"balance":[184],"sustainable":[187],"deployment.":[189]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-25T00:00:00"}
