{"id":"https://openalex.org/W6910560910","doi":"https://doi.org/10.48550/arxiv.2503.13508","title":"It is Too Many Options: Pitfalls of Multiple-Choice Questions in Generative AI and Medical Education","display_name":"It is Too Many Options: Pitfalls of Multiple-Choice Questions in Generative AI and Medical Education","publication_year":2025,"publication_date":"2025-03-13","ids":{"openalex":"https://openalex.org/W6910560910","doi":"https://doi.org/10.48550/arxiv.2503.13508"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2503.13508","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.13508","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2503.13508","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Singh, Shrutika","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Singh, Shrutika","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Alyakin, Anton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alyakin, Anton","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Alber, Daniel Alexander","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alber, Daniel Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Stryker, Jaden","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stryker, Jaden","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tong, Ai Phuong S","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong, Ai Phuong S","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sangwon, Karl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sangwon, Karl","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Goff, Nicolas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goff, Nicolas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"de la Paz, Mathew","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de la Paz, Mathew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hernandez-Rovira, Miguel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hernandez-Rovira, Miguel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Park, Ki Yun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Ki Yun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Leuthardt, Eric Claude","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leuthardt, Eric Claude","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Oermann, Eric Karl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oermann, Eric Karl","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.534500002861023,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.534500002861023,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1225999966263771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.08079999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.6345999836921692},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5620999932289124},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4505999982357025},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43140000104904175},{"id":"https://openalex.org/keywords/educational-measurement","display_name":"Educational measurement","score":0.365200012922287},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.3140999972820282}],"concepts":[{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.6345999836921692},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5620999932289124},{"id":"https://openalex.org/C509550671","wikidata":"https://www.wikidata.org/wiki/Q126945","display_name":"Medical education","level":1,"score":0.47269999980926514},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4505999982357025},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43140000104904175},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4162999987602234},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4068000018596649},{"id":"https://openalex.org/C22156102","wikidata":"https://www.wikidata.org/wiki/Q5341294","display_name":"Educational measurement","level":3,"score":0.365200012922287},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C2779231881","wikidata":"https://www.wikidata.org/wiki/Q5977147","display_name":"Medical literature","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C2993838110","wikidata":"https://www.wikidata.org/wiki/Q494230","display_name":"Medical school","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C2983241795","wikidata":"https://www.wikidata.org/wiki/Q6806500","display_name":"Medical decision making","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.26019999384880066},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2503.13508","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.13508","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2503.13508","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.13508","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8862369060516357}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"performance":[1,25,81,99,133,158],"of":[2,17,53,78,101,107,122,150,175],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"on":[7,26,82,111],"multiple-choice":[8,87,132],"question":[9,124],"(MCQ)":[10],"benchmarks":[11,170],"is":[12],"frequently":[13],"cited":[14],"as":[15],"proof":[16],"their":[18],"medical":[19,27,39,168],"capabilities.":[20,44],"We":[21],"hypothesized":[22],"that":[23],"LLM":[24,131,145],"MCQs":[28,58],"may":[29],"in":[30,80,167,177],"part":[31],"be":[32],"illusory":[33],"and":[34,42,70,72,187],"driven":[35],"by":[36],"factors":[37],"beyond":[38],"content":[40],"knowledge":[41],"reasoning":[43],"To":[45,103],"assess":[46],"this,":[47],"we":[48,63,113],"created":[49],"a":[50,115],"novel":[51],"benchmark":[52],"free-response":[54,83,157,192],"questions":[55,84],"with":[56,143],"paired":[57],"(FreeMedQA).":[59],"Using":[60],"this":[61],"benchmark,":[62],"evaluated":[64],"three":[65],"state-of-the-art":[66],"LLMs":[67,155,176],"(GPT-4o,":[68],"GPT-3.5,":[69],"LLama-3-70B-instruct)":[71],"found":[73],"an":[74,148],"average":[75,130],"absolute":[76],"deterioration":[77],"39.43%":[79],"relative":[85],"to":[86],"(p":[88,140],"=":[89,141],"1.3":[90],"*":[91],"10-5)":[92],"which":[93],"was":[94,134,159],"greater":[95,136],"than":[96,137],"the":[97,105,108,123,129,156,165,173,181],"human":[98,186],"decline":[100],"22.29%.":[102],"isolate":[104],"role":[106],"MCQ":[109,169],"format":[110],"performance,":[112],"performed":[114],"masking":[116,119],"study,":[117],"iteratively":[118],"out":[120],"parts":[121],"stem.":[125],"At":[126],"100%":[127],"masking,":[128],"6.70%":[135],"random":[138],"chance":[139],"0.002)":[142],"one":[144],"(GPT-4o)":[146],"obtaining":[147],"accuracy":[149],"37.34%.":[151],"Notably,":[152],"for":[153,171,183],"all":[154],"near":[160],"zero.":[161],"Our":[162],"results":[163],"highlight":[164],"shortcomings":[166],"overestimating":[172],"capabilities":[174],"medicine,":[178],"and,":[179],"broadly,":[180],"potential":[182],"improving":[184],"both":[185],"machine":[188],"assessments":[189],"using":[190],"LLM-evaluated":[191],"questions.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
