{"id":"https://openalex.org/W7161234516","doi":"https://doi.org/10.48550/arxiv.2605.14126","title":"Reinforcement Learning for Tool-Calling Agents in Fast Healthcare Interoperability Resources (FHIR)","display_name":"Reinforcement Learning for Tool-Calling Agents in Fast Healthcare Interoperability Resources (FHIR)","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161234516","doi":"https://doi.org/10.48550/arxiv.2605.14126"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.14126","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14126","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.14126","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016959376","display_name":"Marius S. Knorr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Knorr, Marius S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136209149","display_name":"Robert M\u00fcller","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u00fcller, Robert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048234823","display_name":"Jan P. Bremer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bremer, Jan P.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136241961","display_name":"Nils Schweingruber","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schweingruber, Nils","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8481000065803528,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8481000065803528,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05079999938607216,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.024399999529123306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.7936999797821045},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6585000157356262},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6137999892234802},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.48500001430511475},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.43689998984336853},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.43389999866485596},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.43309998512268066},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.39910000562667847}],"concepts":[{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.7936999797821045},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7839999794960022},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6585000157356262},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6137999892234802},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48500001430511475},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.43689998984336853},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.43389999866485596},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.43309998512268066},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41600000858306885},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4104999899864197},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.39910000562667847},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.34769999980926514},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C2775851571","wikidata":"https://www.wikidata.org/wiki/Q6045205","display_name":"Interaction protocol","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2824000120162964},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26249998807907104},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.25220000743865967},{"id":"https://openalex.org/C5894958","wikidata":"https://www.wikidata.org/wiki/Q2297769","display_name":"Software agent","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.14126","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14126","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.14126","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14126","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.409121036529541,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fast":[0],"Healthcare":[1],"Interoperability":[2],"Resources":[3],"(FHIR)":[4],"is":[5],"the":[6,60,72],"dominant":[7],"standard":[8],"for":[9,78],"interoperable":[10],"exchange":[11],"of":[12,24,74],"healthcare":[13],"data.":[14],"In":[15],"FHIR,":[16],"electronic":[17],"health":[18],"records":[19],"form":[20],"a":[21,76,92,97,103,114,152],"directed":[22],"graph":[23],"resources.":[25],"Answering":[26],"clinically":[27],"meaningful":[28],"questions":[29],"over":[30,82,96,178],"FHIR":[31,90],"requires":[32],"agents":[33,52],"to":[34,126,147],"perform":[35],"multi-step":[36],"reasoning,":[37],"filtering,":[38],"and":[39,86,107,117,154,170],"aggregation":[40],"across":[41],"multiple":[42],"resource":[43],"types.":[44],"Prior":[45],"work":[46],"shows":[47],"that":[48,173],"even":[49],"tool-augmented":[50],"LLM":[51,120],"(retrieval,":[53],"code":[54],"execution,":[55],"multi-turn":[56,104,176],"planning)":[57],"often":[58],"select":[59],"wrong":[61],"resources":[62],"or":[63],"violate":[64],"traversal":[65],"constraints.":[66,137],"We":[67,101,158],"study":[68],"this":[69],"problem":[70,95],"in":[71],"context":[73],"FHIR-AgentBench,":[75],"benchmark":[77],"realistic":[79],"question":[80],"answering":[81],"real-world":[83],"hospital":[84],"data,":[85],"frame":[87],"reasoning":[88,177],"on":[89,149],"as":[91],"sequential":[93],"decision-making":[94],"queryable":[98],"structured":[99,179],"graph.":[100],"implement":[102],"CodeAct":[105],"agent":[106],"post-train":[108],"it":[109],"with":[110],"reinforcement":[111],"learning":[112],"using":[113,151],"custom":[115,171],"harness":[116,166],"tools.":[118],"A":[119],"Judge":[121],"provides":[122],"execution-grounded":[123],"rewards.":[124],"Compared":[125],"prompt-based,":[127],"closed-model":[128],"baselines,":[129],"RL":[130],"post-training":[131,162],"improves":[132,141,175],"performance":[133],"while":[134],"enforcing":[135],"data-integrity":[136],"Empirically,":[138],"our":[139],"approach":[140],"answer":[142],"correctness":[143],"from":[144],"50%":[145],"(o4-mini)":[146],"77%":[148],"FHIR-AgentBench":[150],"smaller":[153],"cheaper":[155],"Qwen3-8B":[156],"model.":[157],"present":[159],"an":[160],"end-to-end":[161],"pipeline":[163],"(environment":[164],"building,":[165],"construction,":[167],"model":[168],"training":[169],"evaluation)":[172],"reliably":[174],"clinical":[180],"graphs.":[181]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-16T00:00:00"}
