{"id":"https://openalex.org/W7148354463","doi":"https://doi.org/10.1109/asru65441.2025.11434626","title":"Identifying and Calibrating Overconfidence in Noisy Speech Recognition","display_name":"Identifying and Calibrating Overconfidence in Noisy Speech Recognition","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148354463","doi":"https://doi.org/10.1109/asru65441.2025.11434626"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434626","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434626","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111024464","display_name":"Mingyue Huo","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mingyue Huo","raw_affiliation_strings":["University of Illinois Urbana-Champaign,Department of Linguistics,Urbana,USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign,Department of Linguistics,Urbana,USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132798356","display_name":"Yuheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuheng Zhang","raw_affiliation_strings":["University of Illinois Urbana-Champaign,Department of Computer Science,Urbana,USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign,Department of Computer Science,Urbana,USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102013534","display_name":"Yan Tang","orcid":"https://orcid.org/0000-0003-1149-4272"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Tang","raw_affiliation_strings":["University of Illinois Urbana-Champaign,Department of Linguistics,Urbana,USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign,Department of Linguistics,Urbana,USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111024464"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87344449,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8841999769210815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8841999769210815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.04100000113248825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.009200000204145908,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overconfidence-effect","display_name":"Overconfidence effect","score":0.9426000118255615},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.6879000067710876},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5537999868392944},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5138999819755554},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4878000020980835},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.47040000557899475},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4336000084877014},{"id":"https://openalex.org/keywords/confidence-interval","display_name":"Confidence interval","score":0.35429999232292175}],"concepts":[{"id":"https://openalex.org/C51110983","wikidata":"https://www.wikidata.org/wiki/Q16503490","display_name":"Overconfidence effect","level":2,"score":0.9426000118255615},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.6879000067710876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6279000043869019},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5537999868392944},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5335000157356262},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5138999819755554},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4982999861240387},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4878000020980835},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.47040000557899475},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C100253034","wikidata":"https://www.wikidata.org/wiki/Q196372","display_name":"Systematic error","level":2,"score":0.34119999408721924},{"id":"https://openalex.org/C2909755999","wikidata":"https://www.wikidata.org/wiki/Q4751126","display_name":"Low Confidence","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C13662513","wikidata":"https://www.wikidata.org/wiki/Q5160087","display_name":"Confidence distribution","level":3,"score":0.26260000467300415},{"id":"https://openalex.org/C13944312","wikidata":"https://www.wikidata.org/wiki/Q7512748","display_name":"Signal-to-noise ratio (imaging)","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C2984485829","wikidata":"https://www.wikidata.org/wiki/Q100766421","display_name":"Noise level","level":3,"score":0.2565000057220459},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434626","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434626","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5616424679756165}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W34303869","https://openalex.org/W38194800","https://openalex.org/W1517348348","https://openalex.org/W2034537249","https://openalex.org/W2062164080","https://openalex.org/W2115086266","https://openalex.org/W2130169233","https://openalex.org/W2134659216","https://openalex.org/W2159391028","https://openalex.org/W2280141299","https://openalex.org/W2322219713","https://openalex.org/W2407080277","https://openalex.org/W2755891984","https://openalex.org/W2936774411","https://openalex.org/W2972359262","https://openalex.org/W2972981900","https://openalex.org/W3097375352","https://openalex.org/W3104939451","https://openalex.org/W3150122400","https://openalex.org/W3160557590","https://openalex.org/W3161324588","https://openalex.org/W3163169798","https://openalex.org/W3197451691","https://openalex.org/W3199610983","https://openalex.org/W3203388655","https://openalex.org/W4319790622","https://openalex.org/W4386585104","https://openalex.org/W4392100823","https://openalex.org/W4403982368","https://openalex.org/W4404102533","https://openalex.org/W4404262975","https://openalex.org/W4408354278"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"end-to-end":[1],"automatic":[2],"speech":[3],"recognition":[4,14],"(ASR)":[5],"models":[6],"like":[7],"Whisper":[8],"not":[9],"only":[10],"suffer":[11],"from":[12],"reduced":[13],"accuracy":[15],"in":[16,35,100],"noise,":[17],"but":[18],"also":[19],"exhibit":[20],"overconfidence\u2014assigning":[21],"high":[22],"confidence":[23,62,132],"to":[24,84,107],"wrong":[25],"predictions.":[26],"We":[27],"conduct":[28],"a":[29,70],"systematic":[30],"analysis":[31],"of":[32,57],"Whisper\u2019s":[33],"behavior":[34],"additive":[36],"noise":[37,136],"conditions":[38],"and":[39,79,122],"find":[40],"that":[41,75],"overconfident":[42],"errors":[43],"increase":[44],"dramatically":[45],"at":[46],"low":[47,102],"signal-to-noise":[48,103],"ratios,":[49],"with":[50,61],"$\\mathbf{1":[51],"0":[52,55],"-":[53],"2":[54],"\\%}$":[56],"tokens":[58],"incorrectly":[59],"predicted":[60],"above":[63],"0.7.":[64],"To":[65],"mitigate":[66],"this,":[67],"we":[68],"propose":[69],"lightweight,":[71],"post-hoc":[72],"calibration":[73,117],"framework":[74],"detects":[76],"potential":[77],"overconfidence":[78],"applies":[80],"temperature":[81],"scaling":[82],"selectively":[83],"those":[85],"tokens,":[86],"without":[87],"altering":[88],"the":[89,95,101,115,124],"underlying":[90],"ASR":[91],"model.":[92],"Evaluations":[93],"on":[94],"R-SPIN":[96],"dataset":[97],"demonstrate":[98],"that,":[99],"ratio":[104],"range":[105],"(-18":[106],"$-5":[108],"\\mathbf{~":[109],"d":[110],"B}$),":[111],"our":[112],"method":[113],"reduces":[114],"expected":[116],"error":[118],"(ECE)":[119],"by":[120],"58%":[121],"triples":[123],"normalized":[125],"cross":[126],"entropy":[127],"(NCE),":[128],"yielding":[129],"more":[130],"reliable":[131],"estimates":[133],"under":[134],"severe":[135],"conditions.":[137]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-04-03T00:00:00"}
