{"id":"https://openalex.org/W4416017990","doi":"https://doi.org/10.1145/3746252.3760887","title":"Uncertainty Quantification for Multiple-Choice Questions is Just One-Token Deep","display_name":"Uncertainty Quantification for Multiple-Choice Questions is Just One-Token Deep","publication_year":2025,"publication_date":"2025-11-08","ids":{"openalex":"https://openalex.org/W4416017990","doi":"https://doi.org/10.1145/3746252.3760887"},"language":null,"primary_location":{"id":"doi:10.1145/3746252.3760887","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3760887","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746252.3760887","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023848002","display_name":"Qingcheng Zeng","orcid":"https://orcid.org/0000-0002-8697-2729"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qingcheng Zeng","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA"],"raw_orcid":"https://orcid.org/0000-0002-8697-2729","affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mingyu Jin","orcid":"https://orcid.org/0009-0007-6990-7355"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingyu Jin","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0009-0007-6990-7355","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qinkai Yu","orcid":"https://orcid.org/0009-0000-6526-0928"},"institutions":[{"id":"https://openalex.org/I23923803","display_name":"University of Exeter","ror":"https://ror.org/03yghzc09","country_code":"GB","type":"education","lineage":["https://openalex.org/I23923803"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Qinkai Yu","raw_affiliation_strings":["University of Exeter, Exeter, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0000-6526-0928","affiliations":[{"raw_affiliation_string":"University of Exeter, Exeter, United Kingdom","institution_ids":["https://openalex.org/I23923803"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005894987","display_name":"Z.G. Wang","orcid":"https://orcid.org/0000-0003-0311-1331"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenting Wang","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0003-0311-1331","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042013742","display_name":"Wenyue Hua","orcid":"https://orcid.org/0009-0008-2043-2704"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenyue Hua","raw_affiliation_strings":["University of California, Santa Barbara, Santa Barbara, CA, USA"],"raw_orcid":"https://orcid.org/0009-0008-2043-2704","affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara, Santa Barbara, CA, USA","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073806630","display_name":"Guangyan Sun","orcid":"https://orcid.org/0009-0001-4082-5248"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangyan Sun","raw_affiliation_strings":["University of Rochester, Rochester, NY, USA"],"raw_orcid":"https://orcid.org/0009-0001-4082-5248","affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060523641","display_name":"Yanda Meng","orcid":"https://orcid.org/0000-0001-7344-2174"},"institutions":[{"id":"https://openalex.org/I23923803","display_name":"University of Exeter","ror":"https://ror.org/03yghzc09","country_code":"GB","type":"education","lineage":["https://openalex.org/I23923803"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yanda Meng","raw_affiliation_strings":["University of Exeter, Exeter, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-7344-2174","affiliations":[{"raw_affiliation_string":"University of Exeter, Exeter, United Kingdom","institution_ids":["https://openalex.org/I23923803"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101594068","display_name":"Shiqing Ma","orcid":"https://orcid.org/0000-0003-1551-8948"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiqing Ma","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, MA, USA"],"raw_orcid":"https://orcid.org/0000-0003-1551-8948","affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101774659","display_name":"Qifan Wang","orcid":"https://orcid.org/0000-0002-7570-5756"},"institutions":[{"id":"https://openalex.org/I3197470489","display_name":"Alpha Omega Alpha Medical Honor Society","ror":"https://ror.org/057q9nn35","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I3197470489"]},{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qifan Wang","raw_affiliation_strings":["Meta AI, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-7570-5756","affiliations":[{"raw_affiliation_string":"Meta AI, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I3197470489","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034751153","display_name":"Felix Juefei-Xu","orcid":"https://orcid.org/0000-0002-0857-8611"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Felix Juefei-Xu","raw_affiliation_strings":["Meta AI, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-0857-8611","affiliations":[{"raw_affiliation_string":"Meta AI, New York, NY, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101790532","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0003-3442-754X"},"institutions":[{"id":"https://openalex.org/I47251452","display_name":"Wake Forest University","ror":"https://ror.org/0207ad724","country_code":"US","type":"education","lineage":["https://openalex.org/I47251452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Wake Forest University, Winston-Salem, NC, USA"],"raw_orcid":"https://orcid.org/0000-0003-3442-754X","affiliations":[{"raw_affiliation_string":"Wake Forest University, Winston-Salem, NC, USA","institution_ids":["https://openalex.org/I47251452"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044455276","display_name":"Kaize Ding","orcid":"https://orcid.org/0000-0001-6684-6752"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaize Ding","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA"],"raw_orcid":"https://orcid.org/0000-0001-6684-6752","affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101660251","display_name":"Ruixiang Tang","orcid":"https://orcid.org/0000-0001-6476-2336"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruixiang Tang","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0001-6476-2336","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329828","display_name":"Yongfeng Zhang","orcid":"https://orcid.org/0000-0003-2633-8555"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongfeng Zhang","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0003-2633-8555","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5023848002"],"corresponding_institution_ids":["https://openalex.org/I111979921"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16793765,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5474","last_page":"5478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7107999920845032,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7107999920845032,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.05719999969005585,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.0406000018119812,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/uncertainty-quantification","display_name":"Uncertainty quantification","score":0.7156999707221985},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5993000268936157},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5846999883651733},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5672000050544739},{"id":"https://openalex.org/keywords/measurement-uncertainty","display_name":"Measurement uncertainty","score":0.3869999945163727},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.37929999828338623},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.3587000072002411},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.34040001034736633}],"concepts":[{"id":"https://openalex.org/C32230216","wikidata":"https://www.wikidata.org/wiki/Q7882499","display_name":"Uncertainty quantification","level":2,"score":0.7156999707221985},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.65829998254776},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5993000268936157},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5846999883651733},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5672000050544739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4523000121116638},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.391400009393692},{"id":"https://openalex.org/C137209882","wikidata":"https://www.wikidata.org/wiki/Q1403517","display_name":"Measurement uncertainty","level":2,"score":0.3869999945163727},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C177803969","wikidata":"https://www.wikidata.org/wiki/Q29205","display_name":"Uncertainty analysis","level":2,"score":0.3352999985218048},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C100253034","wikidata":"https://www.wikidata.org/wiki/Q196372","display_name":"Systematic error","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746252.3760887","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3760887","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746252.3760887","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3760887","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2606974598","https://openalex.org/W2946609015","https://openalex.org/W2970062726","https://openalex.org/W2970780738","https://openalex.org/W3162922479","https://openalex.org/W4389518686","https://openalex.org/W4389518784","https://openalex.org/W4402669926","https://openalex.org/W4402670143"],"related_works":[],"abstract_inverted_index":{"Multiple-choice":[0],"question":[1,145],"(MCQ)":[2],"benchmarks":[3],"such":[4,36],"as":[5,37,44],"MMLU":[6],"and":[7,41,50,116,142,185,198],"GPQA":[8],"are":[9,64,169,186,213],"widely":[10],"used":[11],"to":[12,61,78,97,201,216],"assess":[13],"the":[14,24,80,83,88,93],"capabilities":[15],"of":[16,47,82,110,183],"large":[17],"language":[18],"models":[19,151],"(LLMs).":[20],"While":[21],"accuracy":[22,122,154],"remains":[23],"standard":[25,137],"evaluation":[26],"metric,":[27],"recent":[28],"work":[29],"has":[30],"introduced":[31],"uncertainty":[32,202],"quantification":[33],"(UQ)":[34],"methods,":[35,58],"entropy,":[38],"conformal":[39],"prediction,":[40],"verbalized":[42],"confidence,":[43],"complementary":[45],"measures":[46],"model":[48,73,94],"reliability":[49],"calibration.":[51,159],"However,":[52],"we":[53,68],"find":[54],"that":[55,70,163],"these":[56],"UQ":[57,111,165],"when":[59],"applied":[60],"MCQ":[62],"tasks,":[63],"unexpectedly":[65],"fragile.":[66],"Specifically,":[67],"show":[69],"fine-tuning":[71],"a":[72,100,107,143],"on":[74,131],"just":[75],"1,000":[76],"examples":[77],"adjust":[79],"probability":[81],"first":[84],"generated":[85],"token,":[86],"under":[87,136],"common":[89],"prompting":[90],"setup":[91],"where":[92,210],"is":[95],"instructed":[96],"output":[98],"only":[99],"single":[101],"answer":[102,121],"choice,":[103],"can":[104],"systematically":[105],"distort":[106],"broad":[108],"range":[109],"methods":[112],"across":[113],"models,":[114],"prompts,":[115],"domains,":[117],"all":[118,149],"while":[119],"leaving":[120],"unchanged.":[123],"We":[124],"validate":[125],"this":[126],"phenomenon":[127],"through":[128,189],"extensive":[129],"experiments":[130],"five":[132],"instruction-tuned":[133],"LLMs,":[134],"tested":[135],"prompting,":[138],"zero-shot":[139],"chain-of-thought":[140],"reasoning,":[141],"biomedical":[144],"answering":[146],"setting.":[147],"In":[148],"cases,":[150],"retain":[152],"similar":[153],"but":[155],"exhibit":[156],"significantly":[157],"degraded":[158],"These":[160],"results":[161],"suggest":[162],"current":[164],"practices":[166],"for":[167,195],"MCQs":[168],"''one-token":[170],"deep'',":[171],"driven":[172],"more":[173,196],"by":[174,179],"first-token":[175],"decoding":[176],"behavior":[177],"than":[178],"any":[180],"deeper":[181],"representation":[182],"uncertainty,":[184],"easily":[187],"manipulated":[188],"minimal":[190],"interventions.":[191],"Our":[192],"findings":[193],"call":[194],"robust":[197],"interpretable":[199],"approaches":[200],"estimation,":[203],"particularly":[204],"in":[205],"structured":[206],"formats":[207],"like":[208],"MCQs,":[209],"confidence":[211],"signals":[212],"often":[214],"reduced":[215],"token-level":[217],"heuristics.":[218]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-08T00:00:00"}
