{"id":"https://openalex.org/W7128521985","doi":"https://doi.org/10.48550/arxiv.2602.08354","title":"Does Your Reasoning Model Implicitly Know When to Stop Thinking?","display_name":"Does Your Reasoning Model Implicitly Know When to Stop Thinking?","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128521985","doi":"https://doi.org/10.48550/arxiv.2602.08354"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.08354","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125585650","display_name":"Zixuan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Zixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125586307","display_name":"Xin Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124829485","display_name":"Yuxi Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Yuxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124755299","display_name":"Jianbin Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Jianbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104232205","display_name":"Xuanda Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xuanda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079105159","display_name":"Zheng Zhang","orcid":"https://orcid.org/0000-0002-4557-2622"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhixia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124832397","display_name":"Hongyan Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Hongyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108854246","display_name":"Songshi Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Songshi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121620510","display_name":"Zehao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zehao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124799625","display_name":"Xuefeng Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Xuefeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125557823","display_name":"Fuzhen Zhuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Fuzhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125536052","display_name":"Jianxin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jianxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125501357","display_name":"Yikun Ban","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ban, Yikun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125487804","display_name":"Deqing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Deqing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5125585650"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.2386000007390976,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.2386000007390976,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.20190000534057617,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10719999670982361,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.835099995136261},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5702999830245972},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5109000205993652},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.36230000853538513},{"id":"https://openalex.org/keywords/uncorrelated","display_name":"Uncorrelated","score":0.32850000262260437}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.835099995136261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7390000224113464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5950999855995178},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5702999830245972},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4027000069618225},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C169345407","wikidata":"https://www.wikidata.org/wiki/Q8216221","display_name":"Uncorrelated","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.30730000138282776},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2955999970436096},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2574000060558319}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0,40],"advancements":[1],"in":[2,27,37],"large":[3],"reasoning":[4,14,45,110,130,140],"models":[5],"(LRMs)":[6],"have":[7],"greatly":[8],"improved":[9],"their":[10],"capabilities":[11],"on":[12],"complex":[13],"tasks":[15],"through":[16],"Long":[17],"Chains":[18],"of":[19,64,144],"Thought":[20],"(CoTs).":[21],"However,":[22],"this":[23,65,84,108],"approach":[24],"often":[25],"results":[26],"substantial":[28],"redundancy,":[29],"impairing":[30],"computational":[31],"efficiency":[32,143],"and":[33,52,70,142],"causing":[34],"significant":[35],"delays":[36],"real-time":[38],"applications.":[39],"studies":[41],"show":[42],"that":[43,73,106],"longer":[44],"chains":[46],"are":[47],"frequently":[48],"uncorrelated":[49],"with":[50],"correctness":[51],"can":[53],"even":[54],"be":[55],"detrimental":[56],"to":[57,80,125],"accuracy.":[58],"In":[59],"a":[60,102],"further":[61],"in-depth":[62],"analysis":[63],"phenomenon,":[66],"we":[67,95],"surprisingly":[68],"uncover":[69],"empirically":[71],"verify":[72],"LRMs":[74,145],"implicitly":[75],"know":[76],"the":[77,139],"appropriate":[78],"time":[79],"stop":[81],"thinking,":[82],"while":[83],"capability":[85],"is":[86],"obscured":[87],"by":[88,93],"current":[89],"sampling":[90,104,117],"paradigms.":[91],"Motivated":[92],"this,":[94],"introduce":[96],"SAGE":[97,114],"(Self-Aware":[98],"Guided":[99],"Efficient":[100],"Reasoning),":[101],"novel":[103],"paradigm":[105],"unleashes":[107],"efficient":[109,129],"potential.":[111],"Furthermore,":[112],"integrating":[113],"as":[115],"mixed":[116],"into":[118,132],"group-based":[119],"reinforcement":[120],"learning":[121],"(SAGE-RL)":[122],"enables":[123],"SAGE-RL":[124],"effectively":[126],"incorporate":[127],"SAGE-discovered":[128],"patterns":[131],"standard":[133],"pass@1":[134],"inference,":[135],"markedly":[136],"enhancing":[137],"both":[138],"accuracy":[141],"across":[146],"multiple":[147],"challenging":[148],"mathematical":[149],"benchmarks.":[150]},"counts_by_year":[],"updated_date":"2026-02-11T14:45:31.134243","created_date":"2026-02-11T00:00:00"}
