{"id":"https://openalex.org/W7127135631","doi":"https://doi.org/10.48550/arxiv.2601.23049","title":"MedMCP-Calc: Benchmarking LLMs for Realistic Medical Calculator Scenarios via MCP Integration","display_name":"MedMCP-Calc: Benchmarking LLMs for Realistic Medical Calculator Scenarios via MCP Integration","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7127135631","doi":"https://doi.org/10.48550/arxiv.2601.23049"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.23049","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124844740","display_name":"Yakun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhu, Yakun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122737003","display_name":"Yutong Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yutong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124870852","display_name":"Shengqian Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Shengqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017431694","display_name":"Zhongzhen Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zhongzhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124819614","display_name":"Shaoting Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shaoting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124829002","display_name":"Xiaofan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaofan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124844740"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.6956999897956848,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.6956999897956848,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.06279999762773514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.026200000196695328,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/calculator","display_name":"Calculator","score":0.7901999950408936},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7470999956130981},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6402999758720398},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5976999998092651},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5282999873161316},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.48350000381469727},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.44769999384880066},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.4302000105381012}],"concepts":[{"id":"https://openalex.org/C2776836400","wikidata":"https://www.wikidata.org/wiki/Q31087","display_name":"Calculator","level":2,"score":0.7901999950408936},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7470999956130981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6733999848365784},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6402999758720398},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5976999998092651},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5282999873161316},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48350000381469727},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.44769999384880066},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.335099995136261},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3278999924659729},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3237999975681305},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.28690001368522644},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2766999900341034},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2662000060081482},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2581000030040741}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.23049","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.23049","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.23049","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.23049","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Medical":[0],"calculators":[1,114],"are":[2,170],"fundamental":[3],"to":[4,131],"quantitative,":[5],"evidence-based":[6],"clinical":[7,72,143],"practice.":[8],"However,":[9],"their":[10],"real-world":[11],"use":[12],"is":[13],"an":[14],"adaptive,":[15],"multi-stage":[16],"process,":[17],"requiring":[18],"proactive":[19],"EHR":[20,82],"data":[21],"acquisition,":[22],"scenario-dependent":[23],"calculator":[24,57],"selection,":[25],"and":[26,88,128,158,168],"multi-step":[27],"computation,":[28],"whereas":[29],"current":[30],"benchmarks":[31],"focus":[32],"only":[33],"on":[34,146],"static":[35],"single-step":[36],"calculations":[37],"with":[38],"explicit":[39],"instructions.":[40],"To":[41],"address":[42],"these":[43,147],"limitations,":[44],"we":[45,149],"introduce":[46],"MedMCP-Calc,":[47],"the":[48],"first":[49],"benchmark":[50],"for":[51,115,135],"evaluating":[52],"LLMs":[53],"in":[54,123,172],"realistic":[55],"medical":[56],"scenarios":[58],"through":[59],"Model":[60],"Context":[61],"Protocol":[62],"(MCP)":[63],"integration.":[64],"MedMCP-Calc":[65],"comprises":[66],"118":[67],"scenario":[68,156],"tasks":[69],"across":[70,142],"4":[71],"domains,":[73],"featuring":[74],"fuzzy":[75,119],"task":[76],"descriptions":[77],"mimicking":[78],"natural":[79],"queries,":[80,120],"structured":[81],"database":[83,126],"interaction,":[84],"external":[85,133],"reference":[86],"retrieval,":[87],"process-level":[89],"evaluation.":[90],"Our":[91],"evaluation":[92],"of":[93],"23":[94],"leading":[95],"models":[96],"reveals":[97],"critical":[98],"limitations:":[99],"even":[100],"top":[101],"performers":[102],"like":[103],"Claude":[104],"Opus":[105],"4.5":[106],"exhibit":[107],"substantial":[108],"gaps,":[109],"including":[110],"difficulty":[111],"selecting":[112],"appropriate":[113],"end-to-end":[116],"workflows":[117],"given":[118],"poor":[121],"performance":[122,163],"iterative":[124],"SQL-based":[125],"interactions,":[127],"marked":[129],"reluctance":[130],"leverage":[132],"tools":[134],"numerical":[136],"computation.":[137],"Performance":[138],"also":[139],"varies":[140],"considerably":[141],"domains.":[144],"Building":[145],"findings,":[148],"develop":[150],"CalcMate,":[151],"a":[152],"fine-tuned":[153],"model":[154],"incorporating":[155],"planning":[157],"tool":[159],"augmentation,":[160],"achieving":[161],"state-of-the-art":[162],"among":[164],"open-source":[165],"models.":[166],"Benchmark":[167],"Codes":[169],"available":[171],"https://github.com/SPIRAL-MED/MedMCP-Calc.":[173]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-03T00:00:00"}
