{"id":"https://openalex.org/W7138860882","doi":"https://doi.org/10.1609/aaai.v40i41.40826","title":"In-Token Rationality Optimization: Towards Accurate and Concise LLM Reasoning via Self-Feedback","display_name":"In-Token Rationality Optimization: Towards Accurate and Concise LLM Reasoning via Self-Feedback","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138860882","doi":"https://doi.org/10.1609/aaai.v40i41.40826"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i41.40826","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40826","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40826/44787","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40826/44787","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128028352","display_name":"Mingye Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingye Zhu","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129822547","display_name":"Yi Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210144331","display_name":"People's Action","ror":"https://ror.org/044v30149","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210144331"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["People's Daily Online"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"People's Daily Online","institution_ids":["https://openalex.org/I4210144331"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057108350","display_name":"Zheren Fu","orcid":"https://orcid.org/0000-0001-8389-8642"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheren Fu","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341261","display_name":"Qi Wang","orcid":"https://orcid.org/0000-0002-1328-8571"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129984561","display_name":"Yongdong Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.6291548,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"41","first_page":"35195","last_page":"35203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4401000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4401000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.17000000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0471000000834465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6863999962806702},{"id":"https://openalex.org/keywords/rationality","display_name":"Rationality","score":0.6019999980926514},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5099999904632568},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4749999940395355},{"id":"https://openalex.org/keywords/realm","display_name":"Realm","score":0.3905999958515167},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.38190001249313354},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.38019999861717224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7095000147819519},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6863999962806702},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6151000261306763},{"id":"https://openalex.org/C201717286","wikidata":"https://www.wikidata.org/wiki/Q938185","display_name":"Rationality","level":2,"score":0.6019999980926514},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5099999904632568},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4749999940395355},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C2778757428","wikidata":"https://www.wikidata.org/wiki/Q1250464","display_name":"Realm","level":2,"score":0.3905999958515167},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.38019999861717224},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3603000044822693},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.28360000252723694},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.2793000042438507},{"id":"https://openalex.org/C2780589192","wikidata":"https://www.wikidata.org/wiki/Q7285140","display_name":"Raising (metalworking)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i41.40826","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40826","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40826/44787","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i41.40826","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40826","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40826/44787","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.422944575548172,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138860882.pdf","grobid_xml":"https://content.openalex.org/works/W7138860882.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"for":[5,61,97],"chain-of-thought":[6],"reasoning":[7,76,168],"presents":[8],"a":[9,15,51,115],"significant":[10],"challenge:":[11],"supervised":[12],"fine-tuning":[13],"on":[14],"single":[16,116],"\"golden\"":[17],"rationale":[18],"hurts":[19],"generalization":[20],"as":[21],"it":[22],"penalizes":[23],"equally":[24],"valid":[25,75],"alternatives,":[26],"whereas":[27],"reinforcement":[28],"learning":[29],"with":[30,34],"verifiable":[31],"rewards":[32],"struggles":[33],"credit":[35],"assignment":[36],"and":[37,59,63,93,110,122],"prohibitive":[38],"computational":[39],"cost.":[40],"To":[41],"tackle":[42],"these":[43],"limitations,":[44],"we":[45],"introduce":[46],"InTRO":[47,78,129,160],"(In-Token":[48],"Rationality":[49],"Optimization),":[50],"new":[52],"framework":[53],"that":[54,170],"enables":[55,161],"both":[56],"token-level":[57,108],"exploration":[58,109],"self-feedback":[60],"accurate":[62,121],"concise":[64,123],"reasoning.":[65],"Instead":[66],"of":[67,148,175],"directly":[68],"optimizing":[69],"an":[70],"intractable":[71],"objective":[72],"over":[73],"all":[74],"paths,":[77],"leverages":[79],"correction":[80],"factors\u2014token-wise":[81],"importance":[82],"weights":[83],"estimated":[84],"by":[85,137],"the":[86,90,104,143,173],"information":[87],"discrepancy":[88],"between":[89],"generative":[91],"policy":[92],"its":[94],"answer-conditioned":[95],"counterpart,":[96],"informative":[98],"next-token":[99],"selection.":[100],"This":[101],"approach":[102],"allows":[103],"model":[105],"to":[106,139,142,166],"perform":[107],"receive":[111],"self-generated":[112],"feedback":[113],"within":[114],"forward":[117],"pass,":[118],"ultimately":[119],"encouraging":[120],"rationales.":[124],"Across":[125],"six":[126],"math-reasoning":[127],"benchmarks,":[128],"consistently":[130],"outperforms":[131],"other":[132],"baselines,":[133],"raising":[134],"solution":[135],"accuracy":[136],"up":[138],"20%":[140],"relative":[141],"base":[144],"model.":[145],"Its":[146],"chains":[147],"thought":[149],"are":[150],"also":[151],"notably":[152],"more":[153],"concise,":[154],"exhibiting":[155],"reduced":[156],"verbosity.":[157],"Beyond":[158],"this,":[159],"cross-domain":[162],"transfer,":[163],"successfully":[164],"adapting":[165],"out-of-domain":[167],"tasks":[169],"extend":[171],"beyond":[172],"realm":[174],"mathematics,":[176],"demonstrating":[177],"robust":[178],"generalization.":[179]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-20T00:00:00"}
