{"id":"https://openalex.org/W7138898902","doi":"https://doi.org/10.48550/arxiv.2603.16843","title":"Internalizing Agency from Reflective Experience","display_name":"Internalizing Agency from Reflective Experience","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7138898902","doi":"https://doi.org/10.48550/arxiv.2603.16843"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16843","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16843","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16843","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069534231","display_name":"Rui Ge","orcid":"https://orcid.org/0000-0003-1049-8132"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ge, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129782633","display_name":"Yichao Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130060066","display_name":"Yuyang Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Yuyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122972602","display_name":"Junda Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Junda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130168066","display_name":"Yiming Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129918313","display_name":"Peng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129799149","display_name":"Hao Helen Zhang (15105980)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5069534231"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3434000015258789,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3434000015258789,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.164000004529953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.06880000233650208,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/agency","display_name":"Agency (philosophy)","score":0.8004000186920166},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6521000266075134},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5430999994277954},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5189999938011169},{"id":"https://openalex.org/keywords/reflection","display_name":"Reflection (computer programming)","score":0.33640000224113464}],"concepts":[{"id":"https://openalex.org/C108170787","wikidata":"https://www.wikidata.org/wiki/Q3951828","display_name":"Agency (philosophy)","level":2,"score":0.8004000186920166},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6521000266075134},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.647599995136261},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5430999994277954},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.4011000096797943},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3395000100135803},{"id":"https://openalex.org/C65682993","wikidata":"https://www.wikidata.org/wiki/Q1056451","display_name":"Reflection (computer programming)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.3319999873638153},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3154999911785126},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.30399999022483826},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.29980000853538513},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.29350000619888306},{"id":"https://openalex.org/C39549134","wikidata":"https://www.wikidata.org/wiki/Q133080","display_name":"Public relations","level":1,"score":0.2777000069618225},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16843","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16843","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16843","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16843","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"are":[3],"increasingly":[4],"deployed":[5],"as":[6,182],"autonomous":[7],"agents":[8],"that":[9,22,96],"must":[10],"plan,":[11],"act,":[12],"and":[13,119,155,170,178],"recover":[14,142],"from":[15,91,100],"mistakes":[16],"through":[17,135],"long-horizon":[18,80],"interaction":[19,160],"with":[20,33,123,185],"environments":[21],"provide":[23],"rich":[24,42],"feedback.":[25],"However,":[26],"prevailing":[27],"outcome-driven":[28,175],"post-training":[29],"methods":[30,180],"(e.g.,":[31,77],"RL":[32],"verifiable":[34],"rewards)":[35],"primarily":[36],"optimize":[37],"final":[38],"success":[39],"signals,":[40],"leaving":[41],"environment":[43,109],"feedback":[44,110],"underutilized.":[45],"Consequently,":[46],"they":[47],"often":[48],"lead":[49],"to":[50,67,73,115,141,189],"distribution":[51],"sharpening:":[52],"the":[53,69,106,133,139,167],"policy":[54,140],"becomes":[55],"better":[56],"at":[57],"reproducing":[58],"a":[59,94,149],"narrow":[60],"set":[61,151],"of":[62,152,187],"already-successful":[63],"behaviors,":[64],"while":[65],"failing":[66],"improve":[68],"feedback-grounded":[70],"agency":[71,99],"needed":[72],"expand":[74],"problem-solving":[75],"capacity":[76],"Pass@k)":[78],"in":[79,145],"settings.":[81],"To":[82],"address":[83],"this,":[84],"we":[85],"propose":[86],"LEAFE":[87,162],"(Learning":[88],"Feedback-Grounded":[89],"Agency":[90],"Reflective":[92],"Experience),":[93],"framework":[95],"internalizes":[97],"recovery":[98],"reflective":[101],"experience.":[102],"Specifically,":[103],"during":[104],"exploration,":[105],"agent":[107],"summarizes":[108],"into":[111,132],"actionable":[112],"experience,":[113],"backtracks":[114],"earlier":[116],"decision":[117],"points,":[118],"explores":[120],"alternative":[121],"branches":[122],"revised":[124],"actions.":[125],"We":[126],"then":[127],"distill":[128],"these":[129],"experience-guided":[130],"corrections":[131],"model":[134,169],"supervised":[136],"fine-tuning,":[137],"enabling":[138],"more":[143],"effectively":[144],"future":[146],"interactions.":[147],"Across":[148],"diverse":[150],"interactive":[153],"coding":[154],"agentic":[156],"tasks":[157],"under":[158],"fixed":[159],"budgets,":[161],"consistently":[163],"improves":[164],"Pass@1":[165],"over":[166],"base":[168],"achieves":[171],"higher":[172],"Pass@k":[173],"than":[174],"baselines":[176],"(GRPO)":[177],"experience-based":[179],"such":[181],"Early":[183],"Experience,":[184],"gains":[186],"up":[188],"14%":[190],"on":[191],"Pass@128.":[192]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
