{"id":"https://openalex.org/W4399449628","doi":"https://doi.org/10.1109/access.2024.3409745","title":"Measuring and Improving the Energy Efficiency of Large Language Models Inference","display_name":"Measuring and Improving the Energy Efficiency of Large Language Models Inference","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4399449628","doi":"https://doi.org/10.1109/access.2024.3409745"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3409745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3409745","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3409745","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053272278","display_name":"Mauricio Fadel Argerich","orcid":"https://orcid.org/0009-0008-9348-8426"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mauricio Fadel Argerich","raw_affiliation_strings":["Escuela T&#x00E9;cnica Superior de Ingenieros Inform&#x00E1;ticos, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Escuela T&#x00E9;cnica Superior de Ingenieros Inform&#x00E1;ticos, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108531115","display_name":"Marta Pati\u00f1o-Mart\u0131\u0301nez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marta Pati\u00f1o-Mart\u00ednez","raw_affiliation_strings":["Escuela T&#x00E9;cnica Superior de Ingenieros Inform&#x00E1;ticos, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Escuela T&#x00E9;cnica Superior de Ingenieros Inform&#x00E1;ticos, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5053272278"],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":17.9156,"has_fulltext":false,"cited_by_count":52,"citation_normalized_percentile":{"value":0.99419188,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"80194","last_page":"80207"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7465896010398865},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6131756901741028},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.47475484013557434},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33163681626319885},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31782954931259155}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7465896010398865},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6131756901741028},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.47475484013557434},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33163681626319885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31782954931259155},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3409745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3409745","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ebfa4b5b98fc449aa0dc18ec9bc0b1c0","is_oa":true,"landing_page_url":"https://doaj.org/article/ebfa4b5b98fc449aa0dc18ec9bc0b1c0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 80194-80207 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3409745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3409745","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8999999761581421}],"awards":[{"id":"https://openalex.org/G5322475874","display_name":null,"funder_award_id":"101004480","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1977661221","https://openalex.org/W2109546606","https://openalex.org/W2541249978","https://openalex.org/W2793320545","https://openalex.org/W2794325560","https://openalex.org/W2896457183","https://openalex.org/W2900504162","https://openalex.org/W2979826702","https://openalex.org/W3037032032","https://openalex.org/W3118781290","https://openalex.org/W3206285609","https://openalex.org/W3208860256","https://openalex.org/W4205390421","https://openalex.org/W4224308101","https://openalex.org/W4284974526","https://openalex.org/W4287204036","https://openalex.org/W4287889735","https://openalex.org/W4302011534","https://openalex.org/W4307225507","https://openalex.org/W4308245305","https://openalex.org/W4308760226","https://openalex.org/W4311642023","https://openalex.org/W4322718253","https://openalex.org/W4362515116","https://openalex.org/W4384918448","https://openalex.org/W4385245566","https://openalex.org/W6749838110","https://openalex.org/W6755207826","https://openalex.org/W6769243733","https://openalex.org/W6773953490","https://openalex.org/W6788175385","https://openalex.org/W6794426595","https://openalex.org/W6803154024","https://openalex.org/W6810081322","https://openalex.org/W6839700459","https://openalex.org/W6844944907","https://openalex.org/W6847420240","https://openalex.org/W6850162387","https://openalex.org/W6851579256","https://openalex.org/W6853251322","https://openalex.org/W6854866820"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Recent":[0],"improvements":[1],"in":[2,10,18,42,63],"the":[3,11,77,98],"accuracy":[4,34],"of":[5,21,27,33,49,80,101,125],"machine":[6],"learning":[7],"(ML)":[8],"models":[9,50],"language":[12],"domain":[13],"have":[14,35,71],"propelled":[15],"their":[16,134,150],"use":[17],"a":[19,46,60],"multitude":[20],"products":[22],"and":[23,57,65,75,105,119,129,144,154],"services,":[24],"touching":[25],"millions":[26],"lives":[28],"daily.":[29],"These":[30],"new":[31,47],"levels":[32,147],"been":[36],"attained":[37],"mainly":[38],"through":[39],"exponential":[40],"growth":[41],"model":[43,122],"size,":[44,123],"creating":[45],"category":[48],"known":[51],"as":[52],"Large":[53],"Language":[54],"Models":[55],"(LLMs)":[56],"leading":[58],"to":[59,96,108,148],"substantial":[61],"increase":[62],"computing":[64],"energy":[66,78,99,110,135,152],"demands.":[67],"While":[68],"recent":[69],"studies":[70],"focused":[72],"on":[73],"measuring":[74],"improving":[76],"consumption":[79,100],"LLMs":[81,102,118],"during":[82,103],"training,":[83],"inference":[84,104,151],"has":[85],"received":[86],"little":[87],"attention.":[88],"In":[89,137],"this":[90],"article,":[91],"we":[92,114,139],"present":[93],"an":[94],"approach":[95],"profile":[97],"leverage":[106,140],"it":[107],"improve":[109],"efficiency.":[111],"For":[112],"this,":[113],"deploy":[115],"several":[116],"state-of-the-art":[117],"observe":[120],"how":[121],"number":[124],"layers,":[126],"parallelized":[127],"attention,":[128],"even":[130],"vocabulary":[131],"size":[132,143],"affect":[133],"consumption.":[136],"addition,":[138],"input":[141],"batch":[142],"different":[145],"quantization":[146],"optimize":[149],"efficiency":[153],"latency.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":13},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
