{"id":"https://openalex.org/W7129076211","doi":"https://doi.org/10.48550/arxiv.2602.12526","title":"Constraint-Rectified Training for Efficient Chain-of-Thought","display_name":"Constraint-Rectified Training for Efficient Chain-of-Thought","publication_year":2026,"publication_date":"2026-02-13","ids":{"openalex":"https://openalex.org/W7129076211","doi":"https://doi.org/10.48550/arxiv.2602.12526"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.12526","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126126802","display_name":"Qinhang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Qinhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126094193","display_name":"Sen Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Sen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126135333","display_name":"Ming Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126138044","display_name":"Yingbin Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yingbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035752536","display_name":"Ness B. Shroff","orcid":"https://orcid.org/0000-0002-4606-6879"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shroff, Ness B.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126126802"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5052000284194946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5052000284194946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.03020000085234642,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5515999794006348},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5241000056266785},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4848000109195709},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.46059998869895935},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4090000092983246},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3723999857902527},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.35850000381469727},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.3578999936580658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7394999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5787000060081482},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5515999794006348},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5241000056266785},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46810001134872437},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.46059998869895935},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4090000092983246},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3723999857902527},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.3578999936580658},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.3199000060558319},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.31769999861717224},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.27720001339912415},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.12526","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.12526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.12526","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Chain-of-Thought":[0],"(CoT)":[1],"has":[2],"significantly":[3],"enhanced":[4],"the":[5,131,153,168],"reasoning":[6,25,57,61,121,156,200,244],"capabilities":[7],"of":[8,138,170,227,234],"Large":[9],"Language":[10],"Models":[11],"(LLMs),":[12],"especially":[13],"when":[14,127],"combined":[15],"with":[16,145],"reinforcement":[17],"learning":[18],"(RL)":[19],"based":[20,102],"post-training":[21,100],"methods.":[22],"While":[23],"longer":[24],"traces":[26],"can":[27],"improve":[28],"answer":[29,186],"quality":[30,187],"and":[31,43,63,83,111,123,135,158,191],"unlock":[32],"abilities":[33],"such":[34],"as":[35,49],"self-correction,":[36],"they":[37],"also":[38,208],"incur":[39],"high":[40],"inference":[41],"costs":[42],"often":[44],"introduce":[45,94],"redundant":[46,139],"steps,":[47],"known":[48],"overthinking.":[50],"Recent":[51],"research":[52],"seeks":[53],"to":[54,87,215],"develop":[55],"efficient":[56,115],"strategies":[58],"that":[59,150,177,197,230],"balance":[60],"length":[62,122,165],"accuracy,":[64],"either":[65],"through":[66],"length-aware":[67],"reward":[68],"design":[69],"or":[70],"prompt-based":[71],"calibration.":[72],"However,":[73],"these":[74,91],"heuristic-based":[75],"approaches":[76],"may":[77],"suffer":[78],"from":[79],"severe":[80],"accuracy":[81,125,161],"drop":[82],"be":[84],"very":[85],"sensitive":[86],"hyperparameters.":[88],"To":[89],"address":[90],"problems,":[92],"we":[93],"CRT":[95,117,144,198],"(Constraint-Rectified":[96],"Training),":[97],"a":[98,108,146,163,189,216,225,232],"principled":[99],"framework":[101,179],"on":[103],"reference-guarded":[104],"constrained":[105],"optimization,":[106],"yielding":[107],"more":[109],"stable":[110,134],"interpretable":[112],"formulation":[113],"for":[114],"reasoning.":[116,140],"alternates":[118],"between":[119],"minimizing":[120],"rectifying":[124],"only":[126,203],"performance":[128],"falls":[129],"below":[130],"reference,":[132],"enabling":[133,240],"effective":[136],"pruning":[137],"We":[141],"further":[142],"extend":[143],"two-stage":[147],"training":[148,222],"scheme":[149],"first":[151],"discovers":[152],"shortest":[154],"reliable":[155,192],"patterns":[157],"then":[159],"refines":[160],"under":[162],"learnt":[164],"budget,":[166],"preventing":[167],"re-emergence":[169],"verbose":[171],"CoT.":[172],"Our":[173],"comprehensive":[174],"evaluation":[175,218],"shows":[176],"this":[178],"consistently":[180],"reduces":[181],"token":[182],"usage":[183],"while":[184,237],"maintaining":[185],"at":[188],"robust":[190],"level.":[193],"Further":[194],"analysis":[195],"reveals":[196],"improves":[199],"efficiency":[201],"not":[202],"by":[204,209],"shortening":[205],"responses":[206],"but":[207],"reducing":[210],"internal":[211],"language":[212],"redundancy,":[213],"leading":[214],"new":[217],"metric.":[219],"Moreover,":[220],"CRT-based":[221],"naturally":[223],"yields":[224],"sequence":[226],"intermediate":[228],"checkpoints":[229],"span":[231],"spectrum":[233],"explanation":[235],"lengths":[236],"preserving":[238],"correctness,":[239],"fine-grained":[241],"control":[242],"over":[243],"verbosity":[245],"without":[246],"retraining.":[247]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-17T00:00:00"}
