{"id":"https://openalex.org/W7140325451","doi":"https://doi.org/10.48550/arxiv.2603.22879","title":"Confidence Calibration under Ambiguous Ground Truth","display_name":"Confidence Calibration under Ambiguous Ground Truth","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140325451","doi":"https://doi.org/10.48550/arxiv.2603.22879"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.22879","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22879","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.22879","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084075159","display_name":"Linwei Tao","orcid":"https://orcid.org/0000-0002-8848-0189"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Linwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Luo, Haoyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Haoyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130597009","display_name":"Minjing Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Minjing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130549717","display_name":"Chang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.2671999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.2671999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.10119999945163727,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.06440000236034393,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.777400016784668},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6692000031471252},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6657000184059143},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.6047999858856201},{"id":"https://openalex.org/keywords/monotonic-function","display_name":"Monotonic function","score":0.45739999413490295},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4275999963283539},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.40849998593330383}],"concepts":[{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.777400016784668},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6692000031471252},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6657000184059143},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6610000133514404},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.6047999858856201},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4869000017642975},{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.40849998593330383},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3772999942302704},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.3750999867916107},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3560999929904938},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.30799999833106995},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30720001459121704},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2745000123977661},{"id":"https://openalex.org/C118671147","wikidata":"https://www.wikidata.org/wiki/Q578714","display_name":"Quantile","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C2776818064","wikidata":"https://www.wikidata.org/wiki/Q829903","display_name":"Agreement","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2703000009059906}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.22879","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22879","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.22879","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22879","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5162716507911682}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Confidence":[0],"calibration":[1,117,135],"assumes":[2],"a":[3,78,126],"unique":[4],"ground-truth":[5],"label":[6,92,142],"per":[7,129],"input,":[8],"yet":[9,36],"this":[10,48],"assumption":[11],"fails":[12],"wherever":[13],"annotators":[14],"genuinely":[15],"disagree.":[16],"Post-hoc":[17],"calibrators":[18,83],"fitted":[19],"on":[20,167],"majority-voted":[21],"labels,":[22],"the":[23,41,90,108,114,162],"standard":[24],"single-label":[25],"targets":[26,160],"used":[27],"in":[28],"practice,":[29],"can":[30],"appear":[31],"well-calibrated":[32],"under":[33,52],"conventional":[34],"evaluation":[35],"remain":[37],"substantially":[38],"miscalibrated":[39],"against":[40,89],"underlying":[42],"annotator":[43,63,110,202],"distribution.":[44],"We":[45],"show":[46,182],"that":[47,61,84,140,183],"failure":[49],"is":[50,57],"structural:":[51],"simplifying":[53],"assumptions,":[54],"Temperature":[55,123,148,192],"Scaling":[56,124,149],"biased":[58],"toward":[59],"temperatures":[60],"underestimate":[62],"uncertainty,":[64],"with":[65,70,125,152,170],"true-label":[66,186],"miscalibration":[67],"increasing":[68],"monotonically":[69],"annotation":[71,104,128],"entropy.":[72],"To":[73],"address":[74],"this,":[75],"we":[76],"develop":[77],"family":[79],"of":[80],"ambiguity-aware":[81],"post-hoc":[82],"optimise":[85],"proper":[86],"scoring":[87],"rules":[88],"full":[91,109],"distribution":[93,111],"and":[94,112,146,176],"require":[95],"no":[96],"model":[97],"retraining.":[98],"Our":[99],"methods":[100],"span":[101],"progressively":[102],"weaker":[103],"requirements:":[105],"Dirichlet-Soft":[106,184],"leverages":[107],"achieves":[113],"best":[115],"overall":[116],"quality":[118],"across":[119,136],"settings;":[120],"Monte":[121],"Carlo":[122],"single":[127],"example":[130],"(MCTS":[131],"S=1)":[132],"matches":[133],"full-distribution":[134],"all":[137],"benchmarks,":[138],"demonstrating":[139],"pre-aggregated":[141],"distributions":[143,173],"are":[144],"unnecessary;":[145],"Label-Smooth":[147],"(LS-TS)":[150],"operates":[151],"voted":[153],"labels":[154],"alone":[155],"by":[156,188,198],"constructing":[157],"data-driven":[158],"pseudo-soft":[159],"from":[161],"model's":[163],"own":[164],"confidence.":[165],"Experiments":[166],"four":[168],"benchmarks":[169],"real":[171],"multi-annotator":[172],"(CIFAR-10H,":[174],"ChaosNLI)":[175],"clinically-informed":[177],"synthetic":[178],"annotations":[179],"(ISIC~2019,":[180],"DermaMNIST)":[181],"reduces":[185,196],"ECE":[187,197],"55-87%":[189],"relative":[190],"to":[191],"Scaling,":[193],"while":[194],"LS-TS":[195],"9-77%":[199],"without":[200],"any":[201],"data.":[203]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-26T00:00:00"}
