{"id":"https://openalex.org/W7134165539","doi":"https://doi.org/10.1109/bigdata66926.2025.11400739","title":"Towards Uncertainty-Aware Low-Bit Quantized LLMs for On-Device Inference","display_name":"Towards Uncertainty-Aware Low-Bit Quantized LLMs for On-Device Inference","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7134165539","doi":"https://doi.org/10.1109/bigdata66926.2025.11400739"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata66926.2025.11400739","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata66926.2025.11400739","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086733781","display_name":"Lorenz Sparrenberg","orcid":"https://orcid.org/0000-0001-9450-7387"},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lorenz Sparrenberg","raw_affiliation_strings":["University of Bonn,Bonn,Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn,Bonn,Germany","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039998519","display_name":"Th. Schneider","orcid":null},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Schneider","raw_affiliation_strings":["University of Bonn,Bonn,Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn,Bonn,Germany","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003368121","display_name":"Tobias Deu\u00dfer","orcid":"https://orcid.org/0000-0003-4685-0847"},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Deu\u00dfer","raw_affiliation_strings":["University of Bonn,Bonn,Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn,Bonn,Germany","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128365892","display_name":"Armin Berger","orcid":null},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Armin Berger","raw_affiliation_strings":["University of Bonn,Bonn,Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn,Bonn,Germany","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123498705","display_name":"Rafet Sifa","orcid":null},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Rafet Sifa","raw_affiliation_strings":["University of Bonn,Bonn,Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn,Bonn,Germany","institution_ids":["https://openalex.org/I135140700"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086733781"],"corresponding_institution_ids":["https://openalex.org/I135140700"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86357344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5930","last_page":"5939"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2671000063419342,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2671000063419342,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.10949999839067459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.0608999989926815,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3569999933242798},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.2578999996185303},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.2540999948978424},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.2506999969482422},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.25060001015663147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48750001192092896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3937999904155731},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29170000553131104},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2624000012874603},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.2506999969482422},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25060001015663147},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.24959999322891235}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata66926.2025.11400739","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata66926.2025.11400739","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},{"id":"pmh:oai:bonndoc.ulb.uni-bonn.de:20.500.11811/13993","is_oa":false,"landing_page_url":"https://hdl.handle.net/20.500.11811/13993","pdf_url":null,"source":{"id":"https://openalex.org/S4306402493","display_name":"bonndoc (University of Bonn)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I135140700","host_organization_name":"University of Bonn","host_organization_lineage":["https://openalex.org/I135140700"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"doc-type:conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W4290994954","https://openalex.org/W4385227045","https://openalex.org/W4385570446","https://openalex.org/W4391494845","https://openalex.org/W4401042444","https://openalex.org/W4402671157","https://openalex.org/W4404781450","https://openalex.org/W4404792996","https://openalex.org/W4406457930"],"related_works":[],"abstract_inverted_index":{"Quantizing":[0],"large":[1],"language":[2],"models":[3,99,166],"(LLMs)":[4],"significantly":[5],"reduces":[6],"memory":[7],"usage":[8],"and":[9,23,72,113,129,170,201],"computational":[10],"requirements,":[11],"enabling":[12,157],"efficient":[13],"on-device":[14,188],"inference.":[15],"However,":[16],"aggressive":[17],"quantization":[18,108,146],"can":[19],"degrade":[20],"model":[21,40],"performance":[22],"exacerbate":[24],"prediction":[25],"uncertainty.":[26],"To":[27],"address":[28],"this":[29],"critical":[30],"issue,":[31],"we":[32,69,115],"propose":[33],"a":[34,45,49,58],"logits-based":[35],"calibration":[36],"strategy":[37],"where":[38],"the":[39,63,135],"is":[41],"restricted":[42],"to":[43,66,103,160],"generating":[44],"single":[46],"token":[47],"from":[48,101],"limited":[50],"set":[51],"of":[52,137,181],"predefined":[53],"decision":[54],"tokens.":[55],"By":[56],"applying":[57],"temperature-scaled":[59],"softmax":[60],"directly":[61,85],"on":[62],"logits":[64],"corresponding":[65],"these":[67],"tokens,":[68],"obtain":[70],"calibrated":[71],"interpretable":[73],"probability":[74],"distributions,":[75],"explicitly":[76],"circumventing":[77],"stochastic":[78],"methods":[79],"such":[80,167,196],"as":[81,148,168,197],"top-k":[82],"sampling":[83],"by":[84,95],"leveraging":[86],"deterministic":[87],"logit":[88],"values,":[89],"revealing":[90],"subtle":[91],"behavioral":[92],"shifts":[93],"caused":[94],"quantization.":[96],"Using":[97],"Qwen-2.5":[98],"ranging":[100],"7\\,B":[102],"72\\,B":[104],"parameters":[105],"at":[106],"various":[107],"levels":[109],"(2,":[110],"4,":[111],"6":[112],"8-bit),":[114],"evaluate":[116],"our":[117],"method":[118],"across":[119],"four":[120],"recently":[121],"released":[122],"benchmarks":[123],"encompassing":[124],"regression":[125],"(README++,":[126],"CompLex-ZH,":[127],"GIRAI)":[128],"classification":[130],"(DarkBench)":[131],"tasks.":[132,174],"Thus,":[133],"minimizing":[134],"risk":[136],"data":[138],"leakage":[139],"into":[140],"pre-training":[141],"data.":[142],"Results":[143],"indicate":[144],"moderate":[145],"(4-bit)":[147],"optimal,":[149],"particularly":[150],"when":[151],"combined":[152],"with":[153],"minimal":[154],"few-shot":[155],"prompting,":[156],"quantized":[158,184],"LLMs":[159,185],"closely":[161],"match":[162],"or":[163],"surpass":[164],"proprietary":[165],"GPT-4o":[169],"GPT-4.1":[171],"in":[172],"certain":[173],"Our":[175],"open-source":[176],"toolkit":[177],"facilitates":[178],"straightforward":[179],"deployment":[180],"reliable,":[182],"uncertainty-aware":[183],"for":[186,193],"privacy-preserving,":[187],"inference,":[189],"making":[190],"them":[191],"suitable":[192],"sensitive":[194],"settings":[195],"human-subject":[198],"economic":[199],"experiments":[200],"survey":[202],"analysis.":[203]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-09T00:00:00"}
