{"id":"https://openalex.org/W7159669914","doi":"https://doi.org/10.48550/arxiv.2604.27233","title":"Reinforced Agent: Inference-Time Feedback for Tool-Calling Agents","display_name":"Reinforced Agent: Inference-Time Feedback for Tool-Calling Agents","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7159669914","doi":"https://doi.org/10.48550/arxiv.2604.27233"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27233","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27233","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27233","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134977949","display_name":"Anh Ta","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ta, Anh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134937141","display_name":"Junjie Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Junjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5061162179","display_name":"Shahin Shayandeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shayandeh, Shahin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5134977949"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09939999878406525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09939999878406525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.09290000051259995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08449999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stateful-firewall","display_name":"Stateful firewall","score":0.5633000135421753},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5320000052452087},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5315999984741211},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4966999888420105},{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.45260000228881836},{"id":"https://openalex.org/keywords/helpfulness","display_name":"Helpfulness","score":0.39070001244544983},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.38260000944137573},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.37689998745918274}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7982000112533569},{"id":"https://openalex.org/C22927095","wikidata":"https://www.wikidata.org/wiki/Q1784206","display_name":"Stateful firewall","level":3,"score":0.5633000135421753},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5320000052452087},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5315999984741211},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5153999924659729},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4966999888420105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46230000257492065},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.45260000228881836},{"id":"https://openalex.org/C2781265381","wikidata":"https://www.wikidata.org/wiki/Q5710255","display_name":"Helpfulness","level":2,"score":0.39070001244544983},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C186886427","wikidata":"https://www.wikidata.org/wiki/Q5441213","display_name":"Feedback loop","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C2779305910","wikidata":"https://www.wikidata.org/wiki/Q5172809","display_name":"Corrective feedback","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2660999894142151},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2565999925136566},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2533999979496002},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27233","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27233","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27233","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27233","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6760667562484741}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tool-calling":[0],"agents":[1],"are":[2,30],"evaluated":[3],"on":[4,180,190,195],"tool":[5,66],"selection,":[6],"parameter":[7],"accuracy,":[8],"and":[9,37,80,98,183,193,239,250],"scope":[10],"recognition,":[11],"yet":[12,117],"LLM":[13],"trajectory":[14],"assessments":[15,26],"remain":[16],"inherently":[17],"post-hoc.":[18],"Disconnected":[19],"from":[20,74],"the":[21,41,54,72,94,108,139,150,207,241,255],"active":[22],"execution":[23,55,96,238],"loop,":[24],"such":[25],"identify":[27],"errors":[28,113,144],"that":[29,145,155,201],"usually":[31],"addressed":[32],"through":[33,247],"prompt-tuning":[34],"or":[35,170],"retraining,":[36],"fundamentally":[38],"cannot":[39],"course-correct":[40],"agent":[42,63,97,143],"in":[43],"real":[44],"time.":[45],"To":[46,129],"close":[47],"this":[48,85,127,131],"gap,":[49],"we":[50,133],"move":[51],"evaluation":[52,79],"into":[53],"loop":[56],"at":[57],"inference":[58],"time:":[59],"a":[60,88,99,167,212,233],"specialized":[61],"reviewer":[62,109,162,202,242],"evaluates":[64],"provisional":[65],"calls":[67],"prior":[68,119],"to":[69,77,121],"execution,":[70],"shifting":[71],"paradigm":[73],"post-hoc":[75],"recovery":[76],"proactive":[78],"error":[81],"mitigation.":[82],"In":[83],"practice,":[84],"architecture":[86],"establishes":[87],"clear":[89],"separation":[90],"of":[91,141,152,236],"concerns":[92],"between":[93],"primary":[95],"secondary":[100],"review":[101],"agent.":[102,257],"As":[103],"with":[104],"any":[105],"multi-agent":[106],"system,":[107],"can":[110,243],"introduce":[111,134],"new":[112],"while":[114],"correcting":[115],"others,":[116],"no":[118],"work":[120],"our":[122,178],"knowledge":[123],"has":[124],"systematically":[125,245],"measured":[126],"tradeoff.":[128],"quantify":[130],"tradeoff,":[132],"Helpfulness-Harmfulness":[135],"metrics:":[136],"helpfulness":[137],"measures":[138,149],"percentage":[140,151],"base":[142,256],"feedback":[146,156],"corrects;":[147],"harmfulness":[148],"correct":[153],"responses":[154],"degrades.":[157],"These":[158],"metrics":[159,199],"directly":[160],"inform":[161],"design":[163],"by":[164],"revealing":[165],"whether":[166],"given":[168],"model":[169,203,209,248],"prompt":[171,221,251],"provides":[172,225],"net":[173],"positive":[174],"value.":[175],"We":[176],"evaluate":[177],"approach":[179],"BFCL":[181],"(single-turn)":[182],"Tau2-Bench":[184],"(multi-turn":[185],"stateful":[186],"scenarios),":[187],"achieving":[188],"+5.5%":[189],"irrelevance":[191],"detection":[192],"+7.1%":[194],"multi-turn":[196],"tasks.":[197],"Our":[198],"reveal":[200],"choice":[204],"is":[205],"critical:":[206],"reasoning":[208],"o3-mini":[210],"achieves":[211],"3:1":[213],"benefit-to-risk":[214],"ratio":[215],"versus":[216],"2.1:1":[217],"for":[218],"GPT-4o.":[219],"Automated":[220],"optimization":[222],"via":[223],"GEPA":[224],"an":[226],"additional":[227],"+1.5-2.8%.":[228],"Together,":[229],"these":[230],"results":[231],"demonstrate":[232],"core":[234],"advantage":[235],"separating":[237],"review:":[240],"be":[244],"improved":[246],"selection":[249],"optimization,":[252],"without":[253],"retraining":[254]},"counts_by_year":[],"updated_date":"2026-05-02T06:10:54.344120","created_date":"2026-05-02T00:00:00"}
