{"id":"https://openalex.org/W7136646268","doi":"https://doi.org/10.48550/arxiv.2603.12458","title":"Shattering the Shortcut: A Topology-Regularized Benchmark for Multi-hop Medical Reasoning in LLMs","display_name":"Shattering the Shortcut: A Topology-Regularized Benchmark for Multi-hop Medical Reasoning in LLMs","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7136646268","doi":"https://doi.org/10.48550/arxiv.2603.12458"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12458","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010907570","display_name":"Xing Zi","orcid":"https://orcid.org/0009-0001-4265-2205"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zi, Xing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129544213","display_name":"Xinying Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xinying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129465194","display_name":"Jinghao Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jinghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129580071","display_name":"Catarina Moreira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moreira, Catarina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124998100","display_name":"Mukesh Prasad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prasad, Mukesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010907570"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.48840001225471497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.48840001225471497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.27320000529289246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7275000214576721},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6912999749183655},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.6169000267982483},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4652999937534332},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4553000032901764},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.40139999985694885},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.3978999853134155},{"id":"https://openalex.org/keywords/medical-diagnosis","display_name":"Medical diagnosis","score":0.35569998621940613}],"concepts":[{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7275000214576721},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6912999749183655},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6904000043869019},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.6169000267982483},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.475600004196167},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4652999937534332},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.40139999985694885},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3711000084877014},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.35569998621940613},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3529999852180481},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.2989000082015991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2921000123023987},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2775999903678894},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5668220520019531}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"achieve":[5],"expert-level":[6],"performance":[7,133,154],"on":[8,123,135],"standard":[9],"medical":[10,80,172],"benchmarks":[11],"through":[12],"single-hop":[13],"factual":[14],"recall,":[15],"they":[16],"severely":[17],"struggle":[18],"with":[19],"the":[20,100,145,166,175],"complex,":[21],"multi-hop":[22,65,137],"diagnostic":[23,73],"reasoning":[24,168],"required":[25],"in":[26,46,164],"real-world":[27],"clinical":[28,66],"settings.":[29],"A":[30],"primary":[31],"obstacle":[32],"is":[33],"\"shortcut":[34],"learning\",":[35],"where":[36],"models":[37,115],"exploit":[38],"highly":[39],"connected,":[40],"generic":[41,91],"hub":[42],"nodes":[43],"(e.g.,":[44],"\"inflammation\")":[45],"knowledge":[47],"graphs":[48],"to":[49,69,93,116],"bypass":[50],"authentic":[51],"micro-pathological":[52],"cascades.":[53],"To":[54],"address":[55],"this,":[56],"we":[57],"introduce":[58],"ShatterMed-QA,":[59],"a":[60,78,84],"bilingual":[61],"benchmark":[62],"of":[63,128,170],"10,558":[64],"questions":[67],"designed":[68],"rigorously":[70],"evaluate":[71],"deep":[72],"reasoning.":[74],"Our":[75],"framework":[76],"constructs":[77],"topology-regularized":[79],"Knowledge":[81],"Graph":[82],"using":[83],"novel":[85],"$k$-Shattering":[86],"algorithm,":[87],"which":[88],"physically":[89],"prunes":[90],"hubs":[92],"explicitly":[94],"sever":[95],"logical":[96],"shortcuts.":[97],"We":[98],"synthesize":[99],"evaluation":[101],"vignettes":[102],"by":[103],"applying":[104],"implicit":[105],"bridge":[106],"entity":[107],"masking":[108],"and":[109,160,179],"topology-driven":[110],"hard":[111],"negative":[112],"sampling,":[113],"forcing":[114],"navigate":[117],"biologically":[118],"plausible":[119],"distractors":[120],"without":[121],"relying":[122],"superficial":[124],"elimination.":[125],"Comprehensive":[126],"evaluations":[127],"21":[129],"LLMs":[130],"reveal":[131],"massive":[132],"degradation":[134],"our":[136,183],"tasks,":[138],"particularly":[139],"among":[140],"domain-specific":[141],"models.":[142],"Crucially,":[143],"restoring":[144],"masked":[146],"evidence":[147],"via":[148],"Retrieval-Augmented":[149],"Generation":[150],"(RAG)":[151],"triggers":[152],"near-universal":[153],"recovery,":[155],"validating":[156],"ShatterMed-QA's":[157],"structural":[158],"fidelity":[159],"proving":[161],"its":[162],"efficacy":[163],"diagnosing":[165],"fundamental":[167],"deficits":[169],"current":[171],"AI.":[173],"Explore":[174],"dataset,":[176],"interactive":[177],"examples,":[178],"full":[180],"leaderboards":[181],"at":[182],"project":[184],"website:":[185],"https://shattermed-qa-web.vercel.app/":[186]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-17T00:00:00"}
