{"id":"https://openalex.org/W4414290345","doi":"https://doi.org/10.1145/3767742","title":"Sustainable LLM Inference for Edge AI: Evaluating Quantized LLMs for Energy Efficiency, Output Accuracy, and Inference Latency","display_name":"Sustainable LLM Inference for Edge AI: Evaluating Quantized LLMs for Energy Efficiency, Output Accuracy, and Inference Latency","publication_year":2025,"publication_date":"2025-09-17","ids":{"openalex":"https://openalex.org/W4414290345","doi":"https://doi.org/10.1145/3767742"},"language":"en","primary_location":{"id":"doi:10.1145/3767742","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767742","pdf_url":null,"source":{"id":"https://openalex.org/S4210175912","display_name":"ACM Transactions on Internet of Things","issn_l":"2577-6207","issn":["2577-6207","2691-1914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Internet of Things","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3767742","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002668348","display_name":"Erik Johannes Husom","orcid":"https://orcid.org/0000-0002-9325-1604"},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]},{"id":"https://openalex.org/I4387930215","display_name":"SINTEF Digital","ror":"https://ror.org/028m52w57","country_code":null,"type":"facility","lineage":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Erik Johannes Husom","raw_affiliation_strings":["SINTEF Digital","SINTEF Digital, Oslo, Norway"],"raw_orcid":"https://orcid.org/0000-0002-9325-1604","affiliations":[{"raw_affiliation_string":"SINTEF Digital","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]},{"raw_affiliation_string":"SINTEF Digital, Oslo, Norway","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087161990","display_name":"Arda G\u00f6knil","orcid":"https://orcid.org/0000-0002-2170-2066"},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]},{"id":"https://openalex.org/I4387930215","display_name":"SINTEF Digital","ror":"https://ror.org/028m52w57","country_code":null,"type":"facility","lineage":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Arda Goknil","raw_affiliation_strings":["SINTEF Digital","SINTEF Digital, Oslo Norway"],"raw_orcid":"https://orcid.org/0000-0002-2170-2066","affiliations":[{"raw_affiliation_string":"SINTEF Digital","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]},{"raw_affiliation_string":"SINTEF Digital, Oslo Norway","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068418151","display_name":"Merve Astekin","orcid":"https://orcid.org/0000-0003-4181-963X"},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]},{"id":"https://openalex.org/I4387930215","display_name":"SINTEF Digital","ror":"https://ror.org/028m52w57","country_code":null,"type":"facility","lineage":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Merve Astekin","raw_affiliation_strings":["SINTEF Digital","SINTEF Digital, Oslo Norway"],"raw_orcid":"https://orcid.org/0000-0003-4181-963X","affiliations":[{"raw_affiliation_string":"SINTEF Digital","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]},{"raw_affiliation_string":"SINTEF Digital, Oslo Norway","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029828965","display_name":"Lwin Khin Shar","orcid":"https://orcid.org/0000-0001-5130-0407"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Lwin Khin Shar","raw_affiliation_strings":["Singapore Management University","Singapore Management University, Singapore Singapore"],"raw_orcid":"https://orcid.org/0000-0001-5130-0407","affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]},{"raw_affiliation_string":"Singapore Management University, Singapore Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065726341","display_name":"Andre K\u00e5sen","orcid":null},"institutions":[{"id":"https://openalex.org/I184531372","display_name":"OsloMet \u2013 Oslo Metropolitan University","ror":"https://ror.org/04q12yn84","country_code":"NO","type":"education","lineage":["https://openalex.org/I184531372"]},{"id":"https://openalex.org/I4210148693","display_name":"Metropolitan University","ror":"https://ror.org/04hdrrs71","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210148693"]}],"countries":["BD","NO"],"is_corresponding":false,"raw_author_name":"Andre K\u00c3\u00a5sen","raw_affiliation_strings":["Oslo Metropolitan University","Oslo Metropolitan University, Oslo, Norway"],"raw_orcid":"https://orcid.org/0009-0007-3745-9113","affiliations":[{"raw_affiliation_string":"Oslo Metropolitan University","institution_ids":["https://openalex.org/I4210148693"]},{"raw_affiliation_string":"Oslo Metropolitan University, Oslo, Norway","institution_ids":["https://openalex.org/I184531372"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065907664","display_name":"Sagar Sen","orcid":"https://orcid.org/0000-0002-5784-7355"},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]},{"id":"https://openalex.org/I4387930215","display_name":"SINTEF Digital","ror":"https://ror.org/028m52w57","country_code":null,"type":"facility","lineage":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Sagar Sen","raw_affiliation_strings":["SINTEF Digital","SINTEF Digital, Oslo Norway"],"raw_orcid":"https://orcid.org/0000-0002-5784-7355","affiliations":[{"raw_affiliation_string":"SINTEF Digital","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]},{"raw_affiliation_string":"SINTEF Digital, Oslo Norway","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107328995","display_name":"Benedikt Andreas Mithassel","orcid":null},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]},{"id":"https://openalex.org/I4387930215","display_name":"SINTEF Digital","ror":"https://ror.org/028m52w57","country_code":null,"type":"facility","lineage":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Benedikt Andreas Mithassel","raw_affiliation_strings":["SINTEF Digital","SINTEF Digital, Oslo Norway"],"raw_orcid":"https://orcid.org/0009-0007-5875-4023","affiliations":[{"raw_affiliation_string":"SINTEF Digital","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]},{"raw_affiliation_string":"SINTEF Digital, Oslo Norway","institution_ids":["https://openalex.org/I173888879","https://openalex.org/I4387930215"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012429784","display_name":"Ahmet Soylu","orcid":"https://orcid.org/0000-0001-6034-4137"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]},{"id":"https://openalex.org/I2800207870","display_name":"H\u00f8yskolen Kristiania","ror":"https://ror.org/03gss5916","country_code":"NO","type":"education","lineage":["https://openalex.org/I2800207870"]}],"countries":["KR","NO"],"is_corresponding":false,"raw_author_name":"Ahmet Soylu","raw_affiliation_strings":["Kristiania University College","Seoul National University","Seoul National University, Seoul Republic of Korea","Kristiania University College, Oslo, Norway"],"raw_orcid":"https://orcid.org/0000-0001-6034-4137","affiliations":[{"raw_affiliation_string":"Kristiania University College","institution_ids":["https://openalex.org/I2800207870"]},{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Seoul National University, Seoul Republic of Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Kristiania University College, Oslo, Norway","institution_ids":["https://openalex.org/I2800207870"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":33.7581,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.99668444,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"6","issue":"4","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9312000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.8521000146865845},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7649999856948853},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4246000051498413},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4162999987602234},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.38199999928474426},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.37130001187324524},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.3537999987602234},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3492000102996826}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8521000146865845},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7649999856948853},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6647999882698059},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4300000071525574},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4246000051498413},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4162999987602234},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4117000102996826},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.37130001187324524},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3537999987602234},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3492000102996826},{"id":"https://openalex.org/C186108316","wikidata":"https://www.wikidata.org/wiki/Q352530","display_name":"Adaptive neuro fuzzy inference system","level":4,"score":0.30480000376701355},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28540000319480896},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.265500009059906}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3767742","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767742","pdf_url":null,"source":{"id":"https://openalex.org/S4210175912","display_name":"ACM Transactions on Internet of Things","issn_l":"2577-6207","issn":["2577-6207","2691-1914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Internet of Things","raw_type":"journal-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11491","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10489","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1145/3767742","raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1145/3767742","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767742","pdf_url":null,"source":{"id":"https://openalex.org/S4210175912","display_name":"ACM Transactions on Internet of Things","issn_l":"2577-6207","issn":["2577-6207","2691-1914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Internet of Things","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W4206688953","https://openalex.org/W4281651027","https://openalex.org/W4288092057","https://openalex.org/W4297948009","https://openalex.org/W4392943591","https://openalex.org/W4395680645","https://openalex.org/W4399397061","https://openalex.org/W4399449628","https://openalex.org/W4400579220","https://openalex.org/W4400728225","https://openalex.org/W4401910477","https://openalex.org/W4402350363","https://openalex.org/W4403413375","https://openalex.org/W4403421327","https://openalex.org/W4404134026","https://openalex.org/W4405887239","https://openalex.org/W4406650295","https://openalex.org/W4408146288","https://openalex.org/W4408324844","https://openalex.org/W4412888943"],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"Large":[1],"Language":[2],"Models":[3,97],"(LLMs)":[4],"on":[5,70,100,173],"edge":[6,72],"devices":[7],"presents":[8],"significant":[9],"challenges":[10],"due":[11],"to":[12,30,120],"computational":[13,40],"constraints,":[14],"memory":[15],"limitations,":[16],"inference":[17,34,85,133],"speed,":[18,134],"and":[19,39,65,87,94,109,111,135],"energy":[20,83,117,131,153],"consumption.":[21,124],"Model":[22],"quantization":[23,67,92,139],"has":[24],"emerged":[25],"as":[26],"a":[27,47,114,167],"key":[28],"technique":[29],"enable":[31],"efficient":[32],"LLM":[33,145,156,175],"by":[35,60],"reducing":[36],"model":[37],"size":[38],"overhead.":[41],"In":[42],"this":[43,158],"study,":[44],"we":[45,112],"conduct":[46],"comprehensive":[48],"analysis":[49],"of":[50],"28":[51],"quantized":[52],"LLMs":[53],"from":[54],"the":[55,128],"Ollama":[56],"library,":[57],"which":[58],"applies":[59],"default":[61],"Post-Training":[62],"Quantization":[63],"(PTQ)":[64],"weight-only":[66],"techniques,":[68],"deployed":[69],"an":[71],"device":[73],"(Raspberry":[74],"Pi":[75],"4":[76,78],"with":[77,155],"GB":[79],"RAM).":[80],"We":[81],"evaluate":[82],"efficiency,":[84,132],"performance,":[86],"output":[88],"accuracy":[89,136],"across":[90],"multiple":[91],"levels":[93],"task":[95],"types.":[96],"are":[98],"benchmarked":[99],"five":[101],"standardized":[102],"datasets":[103],"(CommonsenseQA,":[104],"BIG-Bench":[105],"Hard,":[106],"TruthfulQA,":[107],"GSM8K,":[108],"HumanEval),":[110],"employ":[113],"high-resolution,":[115],"hardware-based":[116],"measurement":[118],"tool":[119],"capture":[121],"real-world":[122],"power":[123],"Our":[125],"findings":[126],"reveal":[127],"trade-offs":[129],"between":[130],"in":[137,170],"different":[138],"settings,":[140],"highlighting":[141],"configurations":[142],"that":[143],"optimize":[144],"deployment":[146],"for":[147,163],"resource-constrained":[148],"environments.":[149],"By":[150],"integrating":[151],"hardware-level":[152],"profiling":[154],"benchmarking,":[157],"study":[159],"provides":[160],"actionable":[161],"insights":[162],"sustainable":[164],"AI,":[165],"bridging":[166],"critical":[168],"gap":[169],"existing":[171],"research":[172],"energy-aware":[174],"deployment.":[176]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":9}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
