{"id":"https://openalex.org/W7160936106","doi":"https://doi.org/10.48550/arxiv.2605.09502","title":"Hidden Error Awareness in Chain-of-Thought Reasoning: The Signal Is Diagnostic, Not Causal","display_name":"Hidden Error Awareness in Chain-of-Thought Reasoning: The Signal Is Diagnostic, Not Causal","publication_year":2026,"publication_date":"2026-05-10","ids":{"openalex":"https://openalex.org/W7160936106","doi":"https://doi.org/10.48550/arxiv.2605.09502"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.09502","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09502","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.09502","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068554798","display_name":"Aojie Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Aojie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135911078","display_name":"Zhiyuan Julian Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Zhiyuan Julian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135949308","display_name":"Haiyue Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haiyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135982201","display_name":"Yi Nian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nian, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135963418","display_name":"Yue Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.17020000517368317,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.17020000517368317,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10656","display_name":"Child and Animal Learning Development","score":0.11860000342130661,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11431","display_name":"Action Observation and Synchronization","score":0.0860000029206276,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7400000095367432},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.586899995803833},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4964999854564667},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4569000005722046},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4472000002861023},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.41589999198913574},{"id":"https://openalex.org/keywords/mean-squared-prediction-error","display_name":"Mean squared prediction error","score":0.3720000088214874},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3386000096797943}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7400000095367432},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6474000215530396},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.586899995803833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5623000264167786},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4964999854564667},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4569000005722046},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.41589999198913574},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37630000710487366},{"id":"https://openalex.org/C167085575","wikidata":"https://www.wikidata.org/wiki/Q6803654","display_name":"Mean squared prediction error","level":2,"score":0.3720000088214874},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C107524782","wikidata":"https://www.wikidata.org/wiki/Q40164","display_name":"Lever","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C2777179996","wikidata":"https://www.wikidata.org/wiki/Q911222","display_name":"Mistake","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.29330000281333923},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28360000252723694},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28279998898506165},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.2651999890804291},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.09502","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09502","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.09502","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09502","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.44961172342300415,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Chain-of-thought":[0],"(CoT)":[1],"prompting":[2],"assumes":[3],"that":[4,181],"generated":[5,88],"reasoning":[6,28,53,106,172],"reflects":[7],"a":[8,19,82,151,157,164],"model's":[9],"internal":[10],"computation.":[11],"We":[12],"show":[13],"this":[14,116],"assumption":[15],"is":[16,63,114,147],"wrong":[17,61],"in":[18,34,86],"specific,":[20],"measurable":[21],"way:":[22],"models":[23,107],"internally":[24],"detect":[25],"their":[26],"own":[27],"errors":[29,121],"but":[30],"outwardly":[31],"express":[32],"confidence":[33,59],"them.":[35],"A":[36,71],"linear":[37],"probe":[38],"on":[39,77],"hidden":[40,91],"states":[41],"predicts":[42],"trace":[43],"correctness":[44],"with":[45],"0.95":[46],"AUROC":[47],"--":[48,56,128,137],"from":[49,176],"the":[50,78,87,120,177],"very":[51],"first":[52],"step":[54],"(0.79)":[55],"while":[57],"verbalized":[58],"for":[60,166],"traces":[62],"4.55/5,":[64],"nearly":[65],"identical":[66],"to":[67,159],"correct":[68],"ones":[69],"(4.87/5).":[70],"text-surface":[72],"classifier":[73],"achieves":[74],"only":[75],"0.59":[76],"same":[79],"data,":[80],"confirming":[81],"0.20-point":[83],"gap":[84],"invisible":[85],"text.":[89],"This":[90,162],"error":[92,169],"awareness":[93],"holds":[94],"across":[95],"three":[96],"model":[97],"families":[98],"(Qwen,":[99],"Llama,":[100],"Phi),":[101],"1.5B-72B":[102],"parameters,":[103],"and":[104,134],"RL-trained":[105],"(DeepSeek-R1,":[108],"0.852":[109],"AUROC).":[110],"The":[111,145],"natural":[112],"question":[113],"whether":[115],"signal":[117,146],"can":[118],"fix":[119],"it":[122],"detects.":[123],"It":[124],"cannot.":[125],"Four":[126],"interventions":[127],"activation":[129,135],"steering,":[130],"probe-guided":[131],"best-of-N,":[132],"self-correction,":[133],"patching":[136,140],"all":[138],"fail;":[139],"destroys":[141],"output":[142],"coherence":[143],"entirely.":[144],"diagnostic,":[148],"not":[149,156],"causal:":[150],"readout":[152],"of":[153],"computation":[154],"quality,":[155],"lever":[158],"redirect":[160],"it.":[161],"delineates":[163],"boundary":[165],"mechanistic":[167],"interpretability:":[168],"representations":[170,180],"during":[171],"are":[173],"fundamentally":[174],"different":[175],"factual":[178],"knowledge":[179],"prior":[182],"work":[183],"has":[184],"successfully":[185],"edited.":[186]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
