{"id":"https://openalex.org/W7136634448","doi":"https://doi.org/10.48550/arxiv.2603.12826","title":"Rethinking Multiple-Choice Questions for RLVR: Unlocking Potential via Distractor Design","display_name":"Rethinking Multiple-Choice Questions for RLVR: Unlocking Potential via Distractor Design","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136634448","doi":"https://doi.org/10.48550/arxiv.2603.12826"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12826","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12826","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12826","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129561658","display_name":"Xu Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guo, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109786310","display_name":"Qiming Ge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Qiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025411088","display_name":"Jian Tong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129591910","display_name":"Kedi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kedi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129451174","display_name":"Jin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129623084","display_name":"Xiaogui Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiaogui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129512975","display_name":"Xuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Xuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129563214","display_name":"Haijun Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Haijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129500244","display_name":"Zhihui Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Zhihui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129628708","display_name":"Yicheng Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Yicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129560957","display_name":"Qipeng Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Qipeng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5129561658"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7960000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7960000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02459999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.01850000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.6707000136375427},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5561000108718872},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5187000036239624},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5133000016212463},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5031999945640564},{"id":"https://openalex.org/keywords/conjunction","display_name":"Conjunction (astronomy)","score":0.45350000262260437},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.3521000146865845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7318999767303467},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.6707000136375427},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5561000108718872},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5187000036239624},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5133000016212463},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5031999945640564},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.45350000262260437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4408999979496002},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40549999475479126},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3555000126361847},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.28760001063346863},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2646999955177307},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12826","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12826","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12826","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12826","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"with":[2,106],"Verifiable":[3],"Rewards":[4],"(RLVR)":[5],"significantly":[6],"enhances":[7,143],"the":[8,57,70,155],"reasoning":[9,37],"capabilities":[10],"of":[11,26,72],"Large":[12],"Language":[13],"Models.":[14],"When":[15],"applied":[16],"to":[17,52,126,154],"RLVR,":[18],"Multiple-Choice":[19],"Questions":[20],"(MCQs)":[21],"offer":[22],"a":[23,119],"scalable":[24],"source":[25],"verifiable":[27],"data":[28],"but":[29],"risk":[30],"inducing":[31],"reward":[32],"hacking,":[33],"where":[34],"models":[35],"shortcut":[36],"via":[38],"random":[39,99],"guessing":[40],"or":[41],"simple":[42],"elimination.":[43],"Current":[44],"approaches":[45],"often":[46],"mitigate":[47,98],"this":[48,65],"by":[49,61,110],"converting":[50],"MCQs":[51],"open-ended":[53],"formats,":[54],"thereby":[55],"discarding":[56],"contrastive":[58],"signal":[59],"provided":[60],"expert-designed":[62],"distractors.":[63],"In":[64],"work,":[66],"we":[67,113],"systematically":[68],"investigate":[69],"impact":[71],"option":[73,86],"design":[74],"on":[75,135],"RLVR.":[76],"Our":[77],"analysis":[78],"highlights":[79],"two":[80],"primary":[81],"insights:":[82],"(1)":[83],"Mismatches":[84],"in":[85,150],"counts":[87],"between":[88],"training":[89,104,152],"and":[90,130,146],"testing":[91],"degrade":[92],"performance.":[93],"(2)":[94],"Strong":[95],"distractors":[96,125],"effectively":[97,142],"guessing,":[100],"enabling":[101],"effective":[102],"RLVR":[103,151],"even":[105],"2-way":[107],"questions.":[108],"Motivated":[109],"these":[111],"findings,":[112],"propose":[114],"Iterative":[115],"Distractor":[116],"Curation":[117],"(IDC),":[118],"framework":[120],"that":[121,139],"actively":[122],"constructs":[123],"high-quality":[124],"block":[127],"elimination":[128],"shortcuts":[129],"promote":[131],"deep":[132],"reasoning.":[133],"Experiments":[134],"various":[136],"benchmarks":[137],"demonstrate":[138],"our":[140],"method":[141],"distractor":[144],"quality":[145],"yields":[147],"significant":[148],"gains":[149],"compared":[153],"original":[156],"data.":[157]},"counts_by_year":[],"updated_date":"2026-03-17T07:05:13.627479","created_date":"2026-03-17T00:00:00"}
