{"id":"https://openalex.org/W7134813278","doi":"https://doi.org/10.48550/arxiv.2603.08706","title":"Agentic Critical Training","display_name":"Agentic Critical Training","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134813278","doi":"https://doi.org/10.48550/arxiv.2603.08706"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08706","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128666512","display_name":"Weize Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Weize","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128662493","display_name":"Minghui Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Minghui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037072952","display_name":"Sy-Tuyen Ho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Sy-Tuyen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128638786","display_name":"Souradip Chakraborty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakraborty, Souradip","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128641867","display_name":"Xiyao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128664383","display_name":"Furong Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Furong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5128666512"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.32359999418258667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.32359999418258667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.23729999363422394,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.0478999987244606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6643000245094299},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6492000222206116},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5414000153541565},{"id":"https://openalex.org/keywords/reflection","display_name":"Reflection (computer programming)","score":0.527899980545044},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5234000086784363},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.49140000343322754}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6643000245094299},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6492000222206116},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5414000153541565},{"id":"https://openalex.org/C65682993","wikidata":"https://www.wikidata.org/wiki/Q1056451","display_name":"Reflection (computer programming)","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5234000086784363},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5088000297546387},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.49140000343322754},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.445499986410141},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44119998812675476},{"id":"https://openalex.org/C183759332","wikidata":"https://www.wikidata.org/wiki/Q343680","display_name":"Action learning","level":4,"score":0.4212999939918518},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3734000027179718},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.37139999866485596},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3244999945163727},{"id":"https://openalex.org/C34868163","wikidata":"https://www.wikidata.org/wiki/Q5141211","display_name":"Cognitive imitation","level":3,"score":0.3034000098705292},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.26010000705718994}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08706","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08706","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08706","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08706","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.5180791616439819,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0,81],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"as":[5],"autonomous":[6],"agents":[7,17,24,89],"often":[8],"begins":[9],"with":[10,135],"imitation":[11,63,148],"learning,":[12],"but":[13],"it":[14],"only":[15],"teaches":[16],"what":[18],"to":[19,42,74,90,109,157],"do":[20],"without":[21,194],"understanding":[22],"why:":[23],"never":[25],"contrast":[26],"successful":[27],"actions":[28],"against":[29],"suboptimal":[30],"alternatives":[31],"and":[32,54,150,187,218],"thus":[33],"lack":[34],"awareness":[35],"of":[36,144,175,202],"action":[37,94,114],"quality.":[38],"Recent":[39],"approaches":[40,158],"attempt":[41],"address":[43],"this":[44],"by":[45],"introducing":[46],"self-reflection":[47,118],"supervision":[48],"derived":[49],"from":[50],"contrasts":[51],"between":[52],"expert":[53],"alternative":[55],"actions.":[56],"However,":[57],"the":[58,65,92,100,107,200],"training":[59,197],"paradigm":[60,86],"fundamentally":[61],"remains":[62],"learning:":[64],"model":[66,108],"imitates":[67],"pre-constructed":[68],"reflection":[69,161],"text":[70],"rather":[71,119],"than":[72,120],"learning":[73,85,149],"reason":[75],"autonomously.":[76],"We":[77],"propose":[78],"Agentic":[79],"Critical":[80],"(ACT),":[82],"a":[83,211],"reinforcement":[84,154],"that":[87,159,208],"trains":[88],"identify":[91],"better":[93],"among":[95],"alternatives.":[96],"By":[97],"rewarding":[98],"whether":[99],"model's":[101],"judgment":[102],"is":[103,210],"correct,":[104],"ACT":[105,128,166,179,209],"drives":[106],"autonomously":[110],"develop":[111],"reasoning":[112,192],"about":[113],"quality,":[115],"producing":[116],"genuine":[117],"imitating":[121],"it.":[122],"Across":[123],"three":[124],"challenging":[125],"agent":[126,131],"benchmarks,":[127],"consistently":[129],"improves":[130,188],"performance":[132,189],"when":[133],"combined":[134],"different":[136],"post-training":[137],"methods.":[138],"It":[139],"achieves":[140],"an":[141,172],"average":[142,173],"improvement":[143,174],"5.07":[145],"points":[146,152],"over":[147,153],"4.62":[151],"learning.":[155],"Compared":[156],"inject":[160],"capability":[162],"through":[163],"knowledge":[164],"distillation,":[165],"also":[167],"demonstrates":[168],"clear":[169],"advantages,":[170],"yielding":[171],"2.42":[176],"points.":[177],"Moreover,":[178],"enables":[180],"strong":[181],"out-of-distribution":[182],"generalization":[183],"on":[184,190],"agentic":[185],"benchmarks":[186,193],"general":[191],"any":[195],"reasoning-specific":[196],"data,":[198],"highlighting":[199],"value":[201],"our":[203],"method.":[204],"These":[205],"results":[206],"suggest":[207],"promising":[212],"path":[213],"toward":[214],"developing":[215],"more":[216],"reflective":[217],"capable":[219],"LLM":[220],"agents.":[221]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
