{"id":"https://openalex.org/W7135052881","doi":"https://doi.org/10.48550/arxiv.2603.10359","title":"HEAL: Hindsight Entropy-Assisted Learning for Reasoning Distillation","display_name":"HEAL: Hindsight Entropy-Assisted Learning for Reasoning Distillation","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135052881","doi":"https://doi.org/10.48550/arxiv.2603.10359"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10359","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10359","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10359","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128848526","display_name":"Wenjing Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Wenjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125698893","display_name":"Jiangze Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Jiangze","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128845792","display_name":"Jieyun Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jieyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101299950","display_name":"Yi Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128879986","display_name":"Shuming Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Shuming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128891226","display_name":"Ping Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128833705","display_name":"Ning Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128879761","display_name":"Zhaoxiang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zhaoxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128916503","display_name":"Kai Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128869357","display_name":"Shiguo Lian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Shiguo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5128848526"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.6843000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.6843000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10656","display_name":"Child and Animal Learning Development","score":0.03319999948143959,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10636","display_name":"Innovative Teaching and Learning Methods","score":0.016499999910593033,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.9275000095367432},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.5527999997138977},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.49239999055862427},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.4797999858856201},{"id":"https://openalex.org/keywords/causal-reasoning","display_name":"Causal reasoning","score":0.47540000081062317},{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.41749998927116394},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4074999988079071},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.4047999978065491},{"id":"https://openalex.org/keywords/counterexample","display_name":"Counterexample","score":0.3921999931335449},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.3889999985694885}],"concepts":[{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.9275000095367432},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.654699981212616},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.5527999997138977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5509999990463257},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.49239999055862427},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.4797999858856201},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.47540000081062317},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4408000111579895},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.41749998927116394},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4074999988079071},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C162838799","wikidata":"https://www.wikidata.org/wiki/Q596077","display_name":"Counterexample","level":2,"score":0.3921999931335449},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.3889999985694885},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.3197999894618988},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C133112747","wikidata":"https://www.wikidata.org/wiki/Q7251931","display_name":"Protocol analysis","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.29899999499320984},{"id":"https://openalex.org/C95611797","wikidata":"https://www.wikidata.org/wiki/Q17502105","display_name":"Infimum and supremum","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C2779525943","wikidata":"https://www.wikidata.org/wiki/Q1187300","display_name":"Grammaticality","level":3,"score":0.28349998593330383},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C122203268","wikidata":"https://www.wikidata.org/wiki/Q5862903","display_name":"Probability theory","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10359","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10359","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10359","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10359","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7792081832885742}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Distilling":[0],"reasoning":[1,67,97],"capabilities":[2],"from":[3,125,142],"Large":[4],"Reasoning":[5],"Models":[6],"(LRMs)":[7],"into":[8],"smaller":[9],"models":[10],"is":[11],"typically":[12],"constrained":[13],"by":[14],"the":[15,23,34,49,71,75],"limitation":[16],"of":[17,74,77],"rejection":[18],"sampling.":[19],"Standard":[20],"methods":[21],"treat":[22],"teacher":[24,35],"as":[25],"a":[26,116,135],"static":[27],"filter,":[28],"discarding":[29],"complex":[30],"\"corner-case\"":[31],"problems":[32],"where":[33],"fails":[36],"to":[37,64,107,145],"explore":[38],"valid":[39],"solutions":[40],"independently,":[41],"thereby":[42],"creating":[43],"an":[44,60,90],"artificial":[45],"\"Teacher":[46],"Ceiling\"":[47],"for":[48],"student.":[50],"In":[51],"this":[52,66],"work,":[53],"we":[54],"propose":[55],"Hindsight":[56],"Entropy-Assisted":[57,87],"Learning":[58],"(HEAL),":[59],"RL-free":[61],"framework":[62],"designed":[63],"bridge":[65],"gap.":[68],"Drawing":[69],"on":[70,150],"educational":[72],"theory":[73],"Zone":[76],"Proximal":[78],"Development(ZPD),":[79],"HEAL":[80,155],"synergizes":[81],"three":[82],"core":[83],"modules:":[84],"(1)":[85],"Guided":[86],"Repair":[88],"(GEAR),":[89],"active":[91],"intervention":[92],"mechanism":[93],"that":[94,120,139,154],"detects":[95],"critical":[96],"breakpoints":[98],"via":[99],"entropy":[100],"dynamics":[101],"and":[102,128,161],"injects":[103],"targeted":[104],"hindsight":[105],"hints":[106],"repair":[108],"broken":[109],"trajectories;":[110],"(2)":[111],"Perplexity-Uncertainty":[112],"Ratio":[113],"Estimator":[114],"(PURE),":[115],"rigorous":[117],"filtering":[118],"protocol":[119],"decouples":[121],"genuine":[122],"cognitive":[123],"breakthroughs":[124],"spurious":[126],"shortcuts;":[127],"(3)":[129],"Progressive":[130],"Answer-guided":[131],"Curriculum":[132],"Evolution":[133],"(PACE),":[134],"three-stage":[136],"distillation":[137,160],"strategy":[138],"organizes":[140],"training":[141],"foundational":[143],"alignment":[144],"frontier":[146],"breakthrough.":[147],"Extensive":[148],"experiments":[149],"multiple":[151],"benchmarks":[152],"demonstrate":[153],"significantly":[156],"outperforms":[157],"traditional":[158],"SFT":[159],"other":[162],"baselines.":[163]},"counts_by_year":[],"updated_date":"2026-03-13T14:25:03.468858","created_date":"2026-03-13T00:00:00"}
