{"id":"https://openalex.org/W7160906306","doi":"https://doi.org/10.48550/arxiv.2605.09252","title":"LLM Agents Already Know When to Call Tools -- Even Without Reasoning","display_name":"LLM Agents Already Know When to Call Tools -- Even Without Reasoning","publication_year":2026,"publication_date":"2026-05-10","ids":{"openalex":"https://openalex.org/W7160906306","doi":"https://doi.org/10.48550/arxiv.2605.09252"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.09252","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09252","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.09252","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067062014","display_name":"Chung-En Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Chung-En","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133361789","display_name":"Linbo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Linbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135954402","display_name":"Ge Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Ge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125664590","display_name":"Zimo Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zimo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135951729","display_name":"Tsui-Wei Weng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weng, Tsui-Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.33410000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.33410000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.09600000083446503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.05849999934434891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8052999973297119},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5810999870300293},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5584999918937683},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5012000203132629},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4690000116825104},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3547999858856201}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8052999973297119},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7698000073432922},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5810999870300293},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5584999918937683},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5012000203132629},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5005999803543091},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4690000116825104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40950000286102295},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28519999980926514},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.09252","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09252","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.09252","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09252","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.5096853971481323,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"},{"score":0.4041902422904968,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tool-augmented":[0],"LLM":[1],"agents":[2],"tend":[3],"to":[4,90,99,177,197],"call":[5,17,32,244],"tools":[6,172],"indiscriminately,":[7],"even":[8],"when":[9,29,171],"the":[10,88,97,136,149,160,199,204,227],"model":[11,98],"can":[12],"answer":[13],"directly.":[14],"Each":[15],"unnecessary":[16,92,115],"wastes":[18],"API":[19],"fees":[20],"and":[21,59,76,94,117,140,202],"latency,":[22],"yet":[23],"no":[24],"existing":[25],"benchmark":[26,40],"systematically":[27],"studies":[28],"a":[30,39,70,121,193,208,241,248],"tool":[31,52,102,143,217,237,243],"is":[33,145,255],"actually":[34],"needed.":[35],"We":[36,79],"propose":[37,189],"When2Tool,":[38],"of":[41,51,83,236],"18":[42],"environments":[43],"(15":[44],"single-hop,":[45],"3":[46],"multi-hop)":[47],"spanning":[48],"three":[49],"categories":[50],"necessity":[53,103,144],"--":[54,62],"computational":[55],"scale,":[56],"knowledge":[57,181],"boundaries,":[58],"execution":[60],"reliability":[61],"each":[63],"with":[64,152,207,221],"controlled":[65],"difficulty":[66],"levels":[67],"that":[68,142,167],"create":[69],"clear":[71],"decision":[72],"boundary":[73],"between":[74],"tool-necessary":[75],"tool-unnecessary":[77],"tasks.":[78,127],"evaluate":[80],"two":[81],"families":[82],"training-free":[84],"baselines:":[85],"Prompt-only":[86,110],"(varying":[87],"prompt":[89],"discourage":[91],"calls)":[93],"Reason-then-Act":[95,118],"(requiring":[96],"reason":[100],"about":[101],"before":[104],"acting).":[105],"Both":[106],"provide":[107],"limited":[108],"control:":[109],"suppresses":[111],"necessary":[112],"calls":[113,218],"alongside":[114],"ones,":[116],"still":[119],"incurs":[120,247],"disproportionate":[122],"accuracy":[123,224,232,251],"cost":[124],"on":[125,179,185],"hard":[126],"To":[128],"understand":[129],"why":[130],"these":[131],"baselines":[132],"fail,":[133],"we":[134,188],"probe":[135,196],"models'":[137],"hidden":[138],"states":[139],"find":[141],"linearly":[146],"decodable":[147],"from":[148],"pre-generation":[150],"representation":[151],"AUROC":[153],"0.89--0.96":[154],"across":[155],"six":[156],"models,":[157],"substantially":[158],"exceeding":[159],"model's":[161,205],"own":[162],"verbalized":[163],"reasoning.":[164],"This":[165],"reveals":[166],"models":[168,213],"already":[169],"know":[170],"are":[173],"needed,":[174],"but":[175,246],"fail":[176],"act":[178],"this":[180,186],"during":[182],"generation.":[183],"Building":[184],"finding,":[187],"Probe&amp;Prefill,":[190],"which":[191],"uses":[192],"lightweight":[194],"linear":[195],"read":[198],"hidden-state":[200],"signal":[201],"prefills":[203],"response":[206],"steering":[209],"sentence.":[210],"Across":[211],"all":[212],"tested,":[214],"Probe&amp;Prefill":[215],"reduces":[216,234],"by":[219],"48%":[220],"only":[222,233],"1.7%":[223],"loss,":[225],"while":[226],"best":[228],"baseline":[229],"at":[230,257],"comparable":[231],"6%":[235],"calls,":[238],"or":[239],"achieves":[240],"similar":[242],"reduction":[245],"5$\\times$":[249],"higher":[250],"loss.":[252],"Our":[253],"code":[254],"available":[256],"https://github.com/Trustworthy-ML-Lab/when2tool":[258]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-13T00:00:00"}
