{"id":"https://openalex.org/W7127668399","doi":"https://doi.org/10.48550/arxiv.2602.02760","title":"From Task Solving to Robust Real-World Adaptation in LLM Agents","display_name":"From Task Solving to Robust Real-World Adaptation in LLM Agents","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127668399","doi":"https://doi.org/10.48550/arxiv.2602.02760"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.02760","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114494302","display_name":"Pouya Pezeskhpour","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pezeshkpour, Pouya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5056213327","display_name":"Estevam Hruschka","orcid":"https://orcid.org/0000-0003-1499-2808"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hruschka, Estevam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5114494302"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.22830000519752502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.22830000519752502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.11420000344514847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6728000044822693},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5200999975204468},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.5117999911308289},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4812000095844269},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.46160000562667847},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.3621000051498413},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.3553999960422516},{"id":"https://openalex.org/keywords/unexpected-events","display_name":"Unexpected events","score":0.3483000099658966}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70169997215271},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6728000044822693},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5200999975204468},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.5117999911308289},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4812000095844269},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.33469998836517334},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3197999894618988},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.3068000078201294},{"id":"https://openalex.org/C198036527","wikidata":"https://www.wikidata.org/wiki/Q2074634","display_name":"Black swan theory","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C199776023","wikidata":"https://www.wikidata.org/wiki/Q202875","display_name":"Negotiation","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2782999873161316},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.02760","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.02760","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02760","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.02760","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,181],"are":[3,29,36,178],"increasingly":[4],"deployed":[5],"as":[6,170],"specialized":[7],"agents":[8,52,134,196],"that":[9],"plan,":[10],"call":[11],"tools,":[12],"and":[13,31,34,38,60,86,107,144,147,164,173,201,209,215,225,232],"take":[14],"actions":[15],"over":[16],"extended":[17],"horizons.":[18],"Yet":[19],"many":[20],"existing":[21],"evaluations":[22],"assume":[23],"a":[24,43,116,120],"\"clean":[25],"interface\"":[26],"where":[27],"dynamics":[28],"specified":[30],"stable,":[32],"tools":[33],"sensors":[35],"reliable,":[37],"success":[39],"is":[40,66,81],"captured":[41],"by":[42],"single":[44],"explicit":[45,194],"objective-often":[46],"overestimating":[47],"real-world":[48],"readiness.":[49],"In":[50],"practice,":[51],"face":[53],"underspecified":[54],"rules,":[55,137],"unreliable":[56],"signals,":[57,106],"shifting":[58],"environments,":[59,104],"implicit,":[61],"multi-stakeholder":[62],"goals.":[63],"The":[64],"challenge":[65],"therefore":[67],"not":[68],"just":[69],"solving":[70],"tasks,":[71],"but":[72,123,176],"adapting":[73],"while":[74],"solving:":[75],"deciding":[76],"what":[77,80],"to":[78,84,88,135,142],"trust,":[79],"wanted,":[82],"when":[83,87,186],"verify,":[85],"fall":[89],"back":[90],"or":[91],"escalate.":[92],"We":[93,111],"stress-test":[94],"deployment-relevant":[95],"robustness":[96],"under":[97,150,228],"four":[98],"operational":[99],"circumstances:":[100],"partial":[101,205,229],"observability,":[102,230],"dynamic":[103,108],"noisy":[105],"agent":[109],"state.":[110],"benchmark":[112],"agentic":[113],"LLMs":[114],"in":[115],"grid-based":[117],"game":[118],"with":[119],"simple":[121],"goal":[122],"long-horizon":[124],"execution.":[125],"Episodes":[126],"violate":[127],"clean-interface":[128],"assumptions":[129],"yet":[130],"remain":[131],"solvable,":[132],"forcing":[133],"infer":[136],"pay":[138],"for":[139],"information,":[140],"adapt":[141],"environmental":[143],"internal":[145],"shifts,":[146],"act":[148],"cautiously":[149],"noise.":[151],"Across":[152],"five":[153],"state-of-the-art":[154],"LLM":[155],"agents,":[156],"we":[157],"find":[158],"large":[159],"gaps":[160],"between":[161],"nominal":[162],"task-solving":[163],"deployment-like":[165],"robustness.":[166],"Performance":[167],"generally":[168],"degrades":[169],"grid":[171],"size":[172],"horizon":[174],"increase,":[175],"rankings":[177],"unstable:":[179],"weaker":[180],"can":[182],"beat":[183],"stronger":[184],"ones":[185],"strategy":[187],"matches":[188],"the":[189],"uncertainty":[190],"regime.":[191],"Despite":[192],"no":[193],"instruction,":[195],"trade":[197],"off":[198],"completion,":[199],"efficiency,":[200],"penalty":[202],"avoidance,":[203],"suggesting":[204],"objective":[206,226],"inference.":[207],"Ablations":[208],"feature":[210],"analyses":[211],"reveal":[212],"model-specific":[213],"sensitivities":[214],"failure":[216],"drivers,":[217],"motivating":[218],"work":[219],"on":[220],"verification,":[221],"safe":[222],"action":[223],"selection,":[224],"inference":[227],"noise,":[231],"non-stationarity.":[233]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-06T00:00:00"}
