{"id":"https://openalex.org/W7162460701","doi":"https://doi.org/10.48550/arxiv.2605.25988","title":"What Makes a Medical Checker Trainable? Diagnosing Signal Collapse and Reward Hacking in Checker-Guided RAG for Biomedical QA","display_name":"What Makes a Medical Checker Trainable? Diagnosing Signal Collapse and Reward Hacking in Checker-Guided RAG for Biomedical QA","publication_year":2026,"publication_date":"2026-05-25","ids":{"openalex":"https://openalex.org/W7162460701","doi":"https://doi.org/10.48550/arxiv.2605.25988"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.25988","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25988","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.25988","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026482925","display_name":"Yuelyu Ji","orcid":"https://orcid.org/0000-0001-6389-5823"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Yuelyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045454868","display_name":"Min Gu Kwak","orcid":"https://orcid.org/0000-0003-0649-9909"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwak, Min Gu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137035821","display_name":"Hang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137046125","display_name":"Xizhi Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Xizhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136998435","display_name":"Chenyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chenyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137017607","display_name":"Yanshan Wan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wan, Yanshan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.18619999289512634,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.18619999289512634,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1551000028848648,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.13619999587535858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.6431000232696533},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6202999949455261},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5181000232696533},{"id":"https://openalex.org/keywords/hacker","display_name":"Hacker","score":0.4104999899864197},{"id":"https://openalex.org/keywords/model-checking","display_name":"Model checking","score":0.3944999873638153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7371000051498413},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.6431000232696533},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6202999949455261},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5181000232696533},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47909998893737793},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43790000677108765},{"id":"https://openalex.org/C86844869","wikidata":"https://www.wikidata.org/wiki/Q2798820","display_name":"Hacker","level":2,"score":0.4104999899864197},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.3944999873638153},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.25988","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25988","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.25988","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25988","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.750808596611023}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Medical":[0],"RAG":[1,48],"needs":[2],"evidence-grounded":[3],"claims,":[4],"so":[5,124],"plugging":[6],"a":[7,45,89,107,112,125,130],"claim-level":[8],"NLI":[9,38],"checker":[10,39,110,147],"into":[11],"retrieval-augmented":[12],"RL":[13,83],"is":[14,69,143],"intuitive.":[15],"\\textbf{We":[16],"find":[17],"that":[18],"the":[19,82,94,145,160],"checker's":[20],"\\emph{output":[21],"distribution}":[22],"during":[23],"training,":[24],"not":[25],"its":[26],"held-out":[27,58],"accuracy,":[28],"decides":[29],"whether":[30],"it":[31],"provides":[32],"trainable":[33],"gradient.}":[34],"We":[35,163],"compare":[36],"four":[37,57],"back-ends":[40],"as":[41,149,166],"process":[42],"rewards":[43],"inside":[44],"GRPO-trained":[46],"medical":[47,59],"agent":[49],"(Qwen2.5-7B,":[50],"replicated":[51],"on":[52,104,151,156],"Qwen3-4B":[53],"and":[54],"Llama-3.1-8B)":[55],"across":[56],"QA":[60],"benchmarks.":[61],"Three":[62],"diagnostic":[63],"findings":[64],"emerge.":[65],"\\textbf{(i)}":[66],"Signal":[67,141],"collapse":[68,122],"log-prob-specific:":[70],"LLM":[71],"log-probability":[72],"scoring":[73],"labels":[74],"over":[75,135],"97\\%":[76],"of":[77],"claims":[78],"neutral":[79],"--":[80,87,116,123],"collapsing":[81],"gradient":[84],"to":[85],"zero":[86],"while":[88],"calibrated":[90],"MedNLI":[91],"classifier":[92,128],"scores":[93],"same":[95,146],"pairs":[96],"non-degenerately.":[97],"\\textbf{(ii)}":[98],"Moderate":[99],"signal":[100,103],"beats":[101],"strong":[102,108,155],"answer":[105],"quality:":[106],"proprietary":[109],"triggers":[111],"three-step":[113],"reward-hacking":[114],"cascade":[115,161],"ultra-short":[117],"answers,":[118],"search":[119],"avoidance,":[120],"language":[121],"moderate-signal":[126],"local":[127],"trains":[129],"higher-quality":[131],"model":[132],"(\\textbf{+12\\%":[133],"BERTScore":[134],"zero-shot,":[136],"no":[137],"GPT":[138],"dependency}).":[139],"\\textbf{(iii)}":[140],"strength":[142],"policy-dependent:":[144],"registers":[148],"moderate":[150],"one":[152],"policy":[153],"but":[154],"another":[157],"without":[158],"triggering":[159],"end-state.":[162],"frame":[164],"these":[165],"boundary":[167],"conditions":[168],"for":[169],"verifier-as-reward":[170],"systems.":[171]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-27T00:00:00"}
