{"id":"https://openalex.org/W7135216010","doi":"https://doi.org/10.48550/arxiv.2603.12109","title":"On Information Self-Locking in Reinforcement Learning for Active Reasoning of LLM agents","display_name":"On Information Self-Locking in Reinforcement Learning for Active Reasoning of LLM agents","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135216010","doi":"https://doi.org/10.48550/arxiv.2603.12109"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12109","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111160404","display_name":"Deyu Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Deyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129027896","display_name":"Yongqiang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yongqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129095998","display_name":"Fan Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128954696","display_name":"Mufei Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mufei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128985403","display_name":"Pan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Pan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129085017","display_name":"Yu Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129058299","display_name":"James Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5479999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5479999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08219999819993973,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.03280000016093254,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7878999710083008},{"id":"https://openalex.org/keywords/ask-price","display_name":"Ask price","score":0.6888999938964844},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5044999718666077},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4708999991416931},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.4381999969482422},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.4343000054359436},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4032000005245209},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.39640000462532043}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7878999710083008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6913999915122986},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.6888999938964844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5946999788284302},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5044999718666077},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4708999991416931},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.4381999969482422},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.4343000054359436},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.3840000033378601},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.37540000677108765},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33340001106262207},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32109999656677246},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C2776904630","wikidata":"https://www.wikidata.org/wiki/Q356336","display_name":"Adept","level":3,"score":0.26010000705718994}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,132],"(RL)":[2],"has":[3],"become":[4],"a":[5,48,105,148],"de":[6],"facto":[7],"paradigm":[8],"for":[9,134],"building":[10],"LLM-based":[11],"agents":[12,27,58],"that":[13,43,108,155,175],"act,":[14],"interact,":[15],"and":[16,65,89,101,137],"reason":[17],"over":[18],"extended":[19],"task":[20,98],"horizons.":[21],"However,":[22],"in":[23,185],"active":[24],"reasoning":[25],"where":[26],"must":[28],"elicit":[29,62],"new":[30],"observations":[31],"through":[32],"interaction":[33],"with":[34],"the":[35,39,72,95,115,131],"environment":[36],"to":[37,61,66,110,139,160,182],"solve":[38],"task,":[40],"we":[41,53,74,145],"find":[42],"outcome-based":[44],"RL":[45],"can":[46],"induce":[47],"systematic":[49],"failure":[50],"mode":[51],"which":[52,85,93],"call":[54],"information":[55],"self-locking":[56],"(SeL):":[57],"fail":[59],"both":[60,135],"informative":[63,118],"feedback":[64],"internalize":[67],"obtained":[68],"evidence.":[69,127],"To":[70,141],"understand":[71],"issue,":[73,144],"trace":[75],"agentic":[76,169],"behaviors":[77],"into":[78],"two":[79],"coupled":[80],"capabilities:":[81],"Action":[82],"Selection":[83],"(AS),":[84],"determines":[86],"observation":[87],"streams,":[88],"Belief":[90],"Tracking":[91],"(BT),":[92],"updates":[94],"agent's":[96],"internal":[97],"understanding.":[99],"Theoretical":[100],"empirical":[102],"analyses":[103],"reveal":[104],"bidirectional":[106],"bottleneck":[107],"leads":[109,138],"SeL:":[111],"weak":[112,121],"BT":[113,124],"obscures":[114],"credit":[116,162],"of":[117,125,171],"actions,":[119],"while":[120],"AS":[122],"deprives":[123],"useful":[126],"This":[128],"coupling":[129],"weakens":[130],"signal":[133],"capabilities":[136],"SeL.":[140],"mitigate":[142],"this":[143],"propose":[146],"AREW,":[147],"simple":[149],"yet":[150],"effective":[151],"Advantage":[152],"Reweighting":[153],"method":[154],"uses":[156],"easy-to-obtain":[157],"directional":[158],"critiques":[159],"reallocate":[161],"within":[163],"trajectories.":[164],"Extensive":[165],"experiments":[166],"across":[167],"9":[168],"tasks":[170],"varying":[172],"complexity":[173],"show":[174],"AREW":[176],"significantly":[177],"mitigates":[178],"SeL,":[179],"yielding":[180],"up":[181],"60-point":[183],"gains":[184],"final":[186],"performance.":[187],"Code":[188],"is":[189],"available":[190],"at":[191],"https://github.com/unimpor/T3.":[192]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
