{"id":"https://openalex.org/W4390189088","doi":"https://doi.org/10.1109/hpec58863.2023.10363447","title":"From Words to Watts: Benchmarking the Energy Costs of Large Language Model Inference","display_name":"From Words to Watts: Benchmarking the Energy Costs of Large Language Model Inference","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4390189088","doi":"https://doi.org/10.1109/hpec58863.2023.10363447"},"language":"en","primary_location":{"id":"doi:10.1109/hpec58863.2023.10363447","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec58863.2023.10363447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108029794","display_name":"Siddharth Samsi","orcid":"https://orcid.org/0009-0005-4937-6054"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]},{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["MX","RU"],"is_corresponding":true,"raw_author_name":"Siddharth Samsi","raw_affiliation_strings":["MIT","Northeastern University"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]},{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624303","display_name":"Dan Zhao","orcid":"https://orcid.org/0000-0002-4484-6505"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dan Zhao","raw_affiliation_strings":["NYU"],"affiliations":[{"raw_affiliation_string":"NYU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102803650","display_name":"J.C. McDonald","orcid":"https://orcid.org/0009-0000-7517-6244"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Joseph McDonald","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100733204","display_name":"Baolin Li","orcid":"https://orcid.org/0000-0001-9778-1023"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baolin Li","raw_affiliation_strings":["NYU"],"affiliations":[{"raw_affiliation_string":"NYU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016624503","display_name":"Adam Michaleas","orcid":"https://orcid.org/0000-0001-7402-8303"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Adam Michaleas","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064275902","display_name":"Michael Jones","orcid":"https://orcid.org/0000-0001-5215-2346"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Michael Jones","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059180282","display_name":"William Bergeron","orcid":null},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"William Bergeron","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072108599","display_name":"Jeremy Kepner","orcid":"https://orcid.org/0000-0001-9668-2613"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Jeremy Kepner","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074406596","display_name":"Devesh Tiwari","orcid":"https://orcid.org/0000-0002-7253-2458"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Devesh Tiwari","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043450560","display_name":"Vijay Gadepally","orcid":"https://orcid.org/0000-0002-4598-2808"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Vijay Gadepally","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5108029794"],"corresponding_institution_ids":["https://openalex.org/I4210109586","https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":25.9085,"has_fulltext":false,"cited_by_count":151,"citation_normalized_percentile":{"value":0.99731204,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.8393771052360535},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7506796717643738},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6623561382293701},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.530913233757019},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4021453857421875},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3922950029373169},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.362833172082901},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.09366318583488464}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8393771052360535},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7506796717643738},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6623561382293701},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.530913233757019},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4021453857421875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3922950029373169},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.362833172082901},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.09366318583488464},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec58863.2023.10363447","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec58863.2023.10363447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2883672905","https://openalex.org/W2963809228","https://openalex.org/W2973727699","https://openalex.org/W3106525532","https://openalex.org/W3204998121","https://openalex.org/W4286892945","https://openalex.org/W4287889735","https://openalex.org/W4289828121","https://openalex.org/W4304192541","https://openalex.org/W4312968147","https://openalex.org/W4316116392","https://openalex.org/W4317882610","https://openalex.org/W4322153971","https://openalex.org/W4322718191","https://openalex.org/W4380769213","https://openalex.org/W4383346782","https://openalex.org/W4385245566","https://openalex.org/W6755207826","https://openalex.org/W6803096969"],"related_works":["https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2284465472","https://openalex.org/W2291782699","https://openalex.org/W1993948687","https://openalex.org/W2000169967","https://openalex.org/W4399363378"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,37,69],"(LLMs)":[3],"have":[4],"exploded":[5],"in":[6,26,76,89,181],"popularity":[7],"due":[8],"to":[9,73,119,172,198,212],"their":[10,96],"new":[11],"generative":[12],"capabilities":[13],"that":[14],"go":[15],"far":[16],"beyond":[17],"prior":[18],"state-of-the-art.":[19],"These":[20],"technologies":[21],"are":[22,70],"increasingly":[23],"being":[24],"leveraged":[25],"various":[27,90],"domains":[28],"such":[29],"as":[30],"law,":[31],"finance,":[32],"and":[33,45,87,108,123,132,141,166,170,183,222],"medicine.":[34],"However,":[35],"these":[36,67,81],"carry":[38],"significant":[39],"computational":[40,122,221],"challenges,":[41],"especially":[42],"the":[43,59,121,138,174,187,207,210,218],"compute":[44],"energy":[46,52,60,124,143,223],"costs":[47,53,61,144],"required":[48],"for":[49,101,179],"inference.":[50],"Inference":[51],"already":[54],"receive":[55],"less":[56],"attention":[57],"than":[58],"of":[62,95,126,137,145,148,159,177,189,209,220],"training":[63],"LLMs-despite":[64],"how":[65],"often":[66],"large":[68],"called":[71],"on":[72,156],"conduct":[74,133],"inference":[75,110,127,139,142,192,215],"reality":[77],"(e.g.,":[78],"ChatGPT).":[79],"As":[80],"state-of-the-art":[82,151],"LLMs":[83,180],"see":[84],"increasing":[85],"usage":[86],"deployment":[88],"domains,":[91],"a":[92,134],"better":[93],"understanding":[94],"resource":[97],"utilization":[98,125],"is":[99,206],"crucial":[100],"cost-savings,":[102],"scaling":[103],"performance,":[104],"efficient":[105],"hardware":[106],"usage,":[107],"optimal":[109],"strategies.":[111],"In":[112],"this":[113,226],"paper,":[114],"we":[115],"describe":[116],"experiments":[117],"conducted":[118],"study":[120,213],"with":[128],"LLMs.":[129],"We":[130,185],"benchmark":[131],"preliminary":[135],"analysis":[136],"performance":[140,216],"different":[146],"sizes":[147],"LLaMA-a":[149],"recent":[150],"LLM-developed":[152],"by":[153],"Meta":[154],"AI":[155],"two":[157,167],"generations":[158],"popular":[160],"GPUs":[161],"(NVIDIA":[162],"V100":[163],"&":[164],"A100)":[165],"datasets":[168],"(Alpaca":[169],"GSM8K)":[171],"reflect":[173],"diverse":[175],"set":[176],"tasks/benchmarks":[178],"research":[182],"practice.":[184],"present":[186],"results":[188],"multi-node,":[190],"multi-GPU":[191],"using":[193],"model":[194],"sharding":[195],"across":[196],"up":[197],"32":[199],"GPUs.":[200],"To":[201],"our":[202,204],"knowledge,":[203],"work":[205],"one":[208],"first":[211],"LLM":[214],"from":[217],"perspective":[219],"resources":[224],"at":[225],"scale.":[227]},"counts_by_year":[{"year":2026,"cited_by_count":23},{"year":2025,"cited_by_count":96},{"year":2024,"cited_by_count":31},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
