{"id":"https://openalex.org/W4417248846","doi":"https://doi.org/10.1109/tce.2025.3642866","title":"Efficient LLM Deployment in Consumer Electronics Applications via Resource-Aware and Salience-Guided Quantization","display_name":"Efficient LLM Deployment in Consumer Electronics Applications via Resource-Aware and Salience-Guided Quantization","publication_year":2025,"publication_date":"2025-12-11","ids":{"openalex":"https://openalex.org/W4417248846","doi":"https://doi.org/10.1109/tce.2025.3642866"},"language":null,"primary_location":{"id":"doi:10.1109/tce.2025.3642866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2025.3642866","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102594984","display_name":"Tianfu Pang","orcid":"https://orcid.org/0009-0007-8291-4437"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianfu Pang","raw_affiliation_strings":["College of Computer Science and Software Engineering, Hohai University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113881053","display_name":"Yingchi Mao","orcid":"https://orcid.org/0000-0002-9884-8100"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingchi Mao","raw_affiliation_strings":["College of Computer Science and Software Engineering, Hohai University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107468508","display_name":"Zhenxiang Pan","orcid":null},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenxiang Pan","raw_affiliation_strings":["College of Computer Science and Software Engineering, Hohai University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065225920","display_name":"Rongzhi Qi","orcid":"https://orcid.org/0009-0008-1181-0339"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongzhi Qi","raw_affiliation_strings":["College of Computer Science and Software Engineering, Hohai University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111027049","display_name":"Benteng Zhang","orcid":"https://orcid.org/0009-0006-6946-5254"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Benteng Zhang","raw_affiliation_strings":["College of Computer Science and Software Engineering, Hohai University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068227459","display_name":"Xiaoming He","orcid":"https://orcid.org/0000-0003-4196-3041"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming He","raw_affiliation_strings":["College of Internet of Things, Nanjing University of Posts and Telecommunications, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Internet of Things, Nanjing University of Posts and Telecommunications, Nanjing, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100600528","display_name":"Jie Wu","orcid":"https://orcid.org/0000-0002-3472-1717"},"institutions":[{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Wu","raw_affiliation_strings":["Center for Networked Computing, Temple University, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Center for Networked Computing, Temple University, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I84392919"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102594984"],"corresponding_institution_ids":["https://openalex.org/I163340411"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41028493,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"72","issue":"1","first_page":"2122","last_page":"2134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.47290000319480896,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.47290000319480896,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.047200001776218414,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8375999927520752},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.5663999915122986},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.510200023651123},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4778999984264374},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.4146000146865845},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3702999949455261},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3490999937057495},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.34389999508857727},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.3224000036716461}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8375999927520752},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7437999844551086},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.5663999915122986},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.510200023651123},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4778999984264374},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.415800005197525},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3702999949455261},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3091000020503998},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C56086750","wikidata":"https://www.wikidata.org/wiki/Q6042592","display_name":"Integer programming","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2840000092983246},{"id":"https://openalex.org/C127964446","wikidata":"https://www.wikidata.org/wiki/Q1092142","display_name":"Computational resource","level":3,"score":0.2777999937534332},{"id":"https://openalex.org/C95491727","wikidata":"https://www.wikidata.org/wiki/Q992968","display_name":"Mobile telephony","level":3,"score":0.27379998564720154},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C138331895","wikidata":"https://www.wikidata.org/wiki/Q11650","display_name":"Electronics","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.26339998841285706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tce.2025.3642866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2025.3642866","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"can":[4,163],"stimulate":[5],"intelligent":[6],"functions":[7],"on":[8,24,99,148,172],"mobile":[9,25,78,100],"devices":[10,26,37],"in":[11,55,191],"the":[12,74,178,185],"applications":[13],"of":[14,77,181],"consumer":[15],"electronics.":[16],"However,":[17],"it":[18],"is":[19,44],"challenging":[20],"to":[21,28,66,73,159,201],"deploy":[22],"LLMs":[23],"due":[27],"limited":[29],"memory":[30,97,117,196],"and":[31,89,96,118,124,193],"computational":[32,95],"resources":[33],"that":[34],"fluctuate":[35],"as":[36],"execute":[38],"multiple":[39],"AI":[40],"tasks":[41],"concurrently.":[42],"Quantization":[43,111],"a":[45,116,205],"widely":[46],"adopted":[47],"compression":[48,207],"technique,":[49],"but":[50],"faces":[51],"two":[52],"major":[53],"difficulties":[54],"practical":[56],"deployment:":[57],"<italic":[58,80],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[59,81],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1)</i>":[60],"Existing":[61],"quantization":[62,156,169,180],"methods":[63,85],"are":[64,140],"unable":[65],"select":[67],"suitable":[68],"models":[69],"for":[70,133,177],"deployment":[71],"according":[72],"resource":[75],"limitations":[76],"devices;":[79],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2)</i>":[82],"Mixed-precision":[83],"quantization-based":[84],"utilize":[86],"reinforcement":[87],"learning":[88],"element-level":[90],"quantization,":[91],"which":[92,162],"incurs":[93],"substantial":[94],"costs":[98],"devices.":[101],"To":[102],"this":[103],"end,":[104],"we":[105],"propose":[106],"Resource":[107],"Awareness-Based":[108],"Salience-Weighted":[109],"Mixed-Precision":[110],"(RAS-MQ).":[112],"Specifically,":[113],"RAS-MQ":[114,128,153,183],"constructs":[115],"latency":[119],"computation":[120],"incorporating":[121],"model":[122],"parameters":[123],"device":[125],"constraints,":[126],"then":[127],"employs":[129],"integer":[130],"linear":[131],"programming":[132],"optimal":[134],"per-layer":[135],"bit-width":[136],"allocation.":[137],"Weight":[138],"matrices":[139],"partitioned":[141],"into":[142],"groups":[143],"with":[144],"bit-widths":[145],"assigned":[146],"based":[147],"average":[149],"saliency":[150],"ranking.":[151],"Furthermore,":[152],"applies":[154],"training-free":[155],"range":[157],"clipping":[158],"salient":[160],"weights,":[161],"improve":[164],"representational":[165],"capacity":[166],"while":[167],"reducing":[168],"error.":[170],"Experiments":[171],"five":[173],"zero-shot":[174],"inference":[175],"datasets,":[176],"2-bit":[179],"LLaMA2-13B,":[182],"outperforms":[184],"state-of-the-art":[186],"baseline":[187],"(SqueezeLLM)":[188],"by":[189],"0.85%":[190],"accuracy":[192],"reduces":[194],"weight":[195],"consumption":[197],"from":[198],"24.5":[199],"GB":[200],"4.1":[202],"GB,":[203],"achieving":[204],"5.98\u00d7":[206],"ratio.":[208]},"counts_by_year":[],"updated_date":"2026-03-26T06:05:38.182114","created_date":"2025-12-11T00:00:00"}
