{"id":"https://openalex.org/W7161816775","doi":"https://doi.org/10.48550/arxiv.2605.19358","title":"Taming the Thinker: Conditional Entropy Shaping for Adaptive LLM Reasoning","display_name":"Taming the Thinker: Conditional Entropy Shaping for Adaptive LLM Reasoning","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161816775","doi":"https://doi.org/10.48550/arxiv.2605.19358"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.19358","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19358","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.19358","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114230309","display_name":"Shuyu Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Shuyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136592684","display_name":"Jian Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136541550","display_name":"Delai Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Delai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136589727","display_name":"Yining Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yining","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136581539","display_name":"Shengping Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shengping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136528518","display_name":"Jiaen Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Jiaen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136524110","display_name":"Ying Fu","orcid":"https://orcid.org/0000-0002-8861-1927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Ying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136556610","display_name":"Wei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136567875","display_name":"Jitao Sang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Jitao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5160999894142151,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5160999894142151,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1080000028014183,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.07249999791383743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.586899995803833},{"id":"https://openalex.org/keywords/conditional-entropy","display_name":"Conditional entropy","score":0.5475000143051147},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.445499986410141},{"id":"https://openalex.org/keywords/kullback\u2013leibler-divergence","display_name":"Kullback\u2013Leibler divergence","score":0.3230000138282776},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.27559998631477356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6431000232696533},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.586899995803833},{"id":"https://openalex.org/C101721835","wikidata":"https://www.wikidata.org/wiki/Q813908","display_name":"Conditional entropy","level":3,"score":0.5475000143051147},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4837000072002411},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.445499986410141},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3944000005722046},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33869999647140503},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C44492722","wikidata":"https://www.wikidata.org/wiki/Q327069","display_name":"Conditional probability","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.19358","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19358","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.19358","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19358","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6645368337631226}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Entropy-based":[0],"deep":[1],"reasoning":[2,12,96],"has":[3],"emerged":[4],"as":[5,78],"a":[6,47,84,144],"promising":[7],"direction":[8],"for":[9],"improving":[10],"the":[11,32],"capabilities":[13],"of":[14,34],"Large":[15],"Language":[16],"Models":[17],"(LLMs),":[18],"but":[19],"existing":[20],"methods":[21],"often":[22],"either":[23],"increase":[24],"response":[25,53,132],"length":[26,133],"indiscriminately":[27],"or":[28],"shorten":[29],"responses":[30],"at":[31],"cost":[33],"accuracy.":[35],"To":[36],"better":[37],"balance":[38],"this":[39],"trade-off,":[40],"we":[41],"introduce":[42],"Conditional":[43],"Entropy":[44],"Shaping":[45],"(CES),":[46],"framework":[48],"that":[49],"dynamically":[50],"controls":[51],"token-level":[52,76],"entropy,":[54],"enabling":[55],"LLMs":[56],"to":[57,98,107,135],"produce":[58],"concise":[59],"solutions":[60],"on":[61,68,72,94,104,116,121,143,149],"simple":[62],"problems":[63],"while":[64,130],"encouraging":[65],"deeper":[66],"exploration":[67,109],"hard":[69],"ones.":[70],"Built":[71],"DAPO,":[73,136],"CES":[74,115,125],"uses":[75],"entropy":[77],"an":[79],"uncertainty":[80],"signal":[81],"and":[82,101,110,118,137,148],"applies":[83],"conditional":[85],"bidirectional":[86],"policy:":[87],"it":[88,120],"penalizes":[89],"high-entropy":[90],"\"forking":[91],"point\"":[92],"tokens":[93],"correct":[95],"paths":[97,106],"improve":[99],"conciseness,":[100],"rewards":[102],"them":[103],"incorrect":[105],"encourage":[108],"error":[111],"correction.":[112],"We":[113],"implement":[114],"DeepSeek-R1-Distill-7B":[117],"evaluate":[119],"12":[122],"mathematical":[123],"benchmarks.":[124,151],"consistently":[126],"improves":[127],"average":[128],"accuracy":[129],"reducing":[131],"relative":[134],"supplementary":[138],"experiments":[139],"show":[140],"similar":[141],"trends":[142],"smaller":[145],"1.5B":[146],"backbone":[147],"out-of-domain":[150]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
