{"id":"https://openalex.org/W7128483324","doi":"https://doi.org/10.48550/arxiv.2602.08964","title":"A Behavioural and Representational Evaluation of Goal-Directedness in Language Model Agents","display_name":"A Behavioural and Representational Evaluation of Goal-Directedness in Language Model Agents","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128483324","doi":"https://doi.org/10.48550/arxiv.2602.08964"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.08964","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014177305","display_name":"Raghu Arghal","orcid":"https://orcid.org/0000-0002-1123-4975"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arghal, Raghu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010723678","display_name":"Fade Chen","orcid":"https://orcid.org/0009-0009-4246-179X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Fade","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125528545","display_name":"Niall Dalton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dalton, Niall","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125484377","display_name":"Evgenii Kortukov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kortukov, Evgenii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069529453","display_name":"Calum McNamara","orcid":"https://orcid.org/0000-0003-1032-073X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McNamara, Calum","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124025748","display_name":"Angelos Nalmpantis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nalmpantis, Angelos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125519669","display_name":"Moksh Nirvaan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nirvaan, Moksh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042129410","display_name":"Gabriele Sarti","orcid":"https://orcid.org/0000-0001-8715-2987"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sarti, Gabriele","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125533305","display_name":"Mario Giulianelli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giulianelli, Mario","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2102999985218048,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2102999985218048,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.10809999704360962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.08910000324249268,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6704999804496765},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6284999847412109},{"id":"https://openalex.org/keywords/introspection","display_name":"Introspection","score":0.5532000064849854},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.5034000277519226},{"id":"https://openalex.org/keywords/internal-model","display_name":"Internal model","score":0.47589999437332153},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.4180999994277954},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.4027999937534332},{"id":"https://openalex.org/keywords/goal-orientation","display_name":"Goal orientation","score":0.39489999413490295}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6704999804496765},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6284999847412109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.600600004196167},{"id":"https://openalex.org/C129671850","wikidata":"https://www.wikidata.org/wiki/Q210501","display_name":"Introspection","level":2,"score":0.5532000064849854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5252000093460083},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.5034000277519226},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.47589999437332153},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.44780001044273376},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.4027999937534332},{"id":"https://openalex.org/C84653758","wikidata":"https://www.wikidata.org/wiki/Q5575175","display_name":"Goal orientation","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.373199999332428},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36489999294281006},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.35910001397132874},{"id":"https://openalex.org/C78780964","wikidata":"https://www.wikidata.org/wiki/Q7233193","display_name":"Position paper","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.31450000405311584},{"id":"https://openalex.org/C2776496909","wikidata":"https://www.wikidata.org/wiki/Q7313984","display_name":"Repertory grid","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.08964","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.08964","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.08964","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"an":[1,47],"agent's":[2],"goals":[3,19],"helps":[4],"explain":[5],"and":[6,73,89,104,127,141,172],"predict":[7],"its":[8,125,132],"behaviour,":[9],"yet":[10],"there":[11],"is":[12,162],"no":[13],"established":[14],"methodology":[15],"for":[16,27],"reliably":[17],"attributing":[18],"to":[20,86,97,167],"agentic":[21],"systems.":[22],"We":[23,92,108],"propose":[24],"a":[25,42,51,56,116],"framework":[26],"evaluating":[28],"goal-directedness":[29],"that":[30,77,110,131,142,159],"integrates":[31],"behavioural":[32,165],"evaluation":[33],"with":[34,80,137],"interpretability-based":[35],"analyses":[36],"of":[37,101],"models'":[38],"internal":[39,99,139],"representations.":[40],"As":[41],"case":[43],"study,":[44],"we":[45,60],"examine":[46],"LLM":[48,112],"agent":[49,63,113],"navigating":[50],"2D":[52],"grid":[53,69],"world":[54],"towards":[55,150],"goal":[57,74,129],"state.":[58],"Behaviourally,":[59],"evaluate":[61],"the":[62,102,111,128,157],"against":[64],"optimal":[65],"policies":[66],"across":[67],"varying":[68],"sizes,":[70],"obstacle":[71],"densities,":[72],"structures,":[75],"finding":[76],"performance":[78],"scales":[79],"task":[81],"difficulty":[82],"while":[83],"remaining":[84],"robust":[85],"difficulty-preserving":[87],"transformations":[88],"multi-goal":[90],"structures.":[91],"then":[93],"use":[94],"probing":[95],"methods":[96],"decode":[98],"representations":[100],"environment":[103],"multi-step":[105],"action":[106,152],"plans.":[107],"find":[109],"non-linearly":[114],"encodes":[115],"coarse":[117],"spatial":[118,148],"map,":[119],"preserving":[120],"approximate":[121],"task-relevant":[122],"cues":[123,149],"about":[124],"position":[126],"location;":[130],"actions":[133],"are":[134],"broadly":[135],"consistent":[136],"these":[138],"representations;":[140],"reasoning":[143],"reorganises":[144],"them,":[145],"shifting":[146],"from":[147],"immediate":[151],"selection.":[153],"Our":[154],"findings":[155],"support":[156],"view":[158],"introspective":[160],"examination":[161],"required":[163],"beyond":[164],"evaluations":[166],"characterise":[168],"how":[169],"agents":[170],"represent":[171],"pursue":[173],"their":[174],"objectives.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-11T00:00:00"}
