{"id":"https://openalex.org/W7138197918","doi":"https://doi.org/10.1609/aaai.v40i29.39656","title":"Prototype Entropy Alignment: Reinforcing Structured Uncertainty in LLM Reasoning","display_name":"Prototype Entropy Alignment: Reinforcing Structured Uncertainty in LLM Reasoning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138197918","doi":"https://doi.org/10.1609/aaai.v40i29.39656"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i29.39656","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39656","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39656/43617","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39656/43617","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129714538","display_name":"Zhengyuan Pan","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhengyuan Pan","raw_affiliation_strings":["School of Film, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Film, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129665080","display_name":"Yanhao Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanhao Chen","raw_affiliation_strings":["School of Film, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Film, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087501457","display_name":"Zhi-Wei Jian","orcid":null},"institutions":[{"id":"https://openalex.org/I354108","display_name":"Minjiang University","ror":"https://ror.org/00s7tkw17","country_code":"CN","type":"education","lineage":["https://openalex.org/I354108"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongquan Jian","raw_affiliation_strings":["School of Computer and Data Science, Minjiang University, Fuzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Data Science, Minjiang University, Fuzhou, China","institution_ids":["https://openalex.org/I354108"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749645","display_name":"Wanru Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanru Zhao","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129668273","display_name":"Haonan Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Ma","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129734697","display_name":"Meihong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meihong Wang","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129681990","display_name":"Qingqiang Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]},{"id":"https://openalex.org/I75867142","display_name":"Xiamen University of Technology","ror":"https://ror.org/01285e189","country_code":"CN","type":"education","lineage":["https://openalex.org/I75867142"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingqiang Wu","raw_affiliation_strings":["School of Film, Xiamen University, Xiamen, China\nSchool of Informatics, Xiamen University, Xiamen, China\nKey Laboratory of Digital Protection and Intelligent Processing of Intangible Cultural Heritage of Fujian and Taiwan, Ministry of Culture and Tourism, Xiamen University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Film, Xiamen University, Xiamen, China\nSchool of Informatics, Xiamen University, Xiamen, China\nKey Laboratory of Digital Protection and Intelligent Processing of Intangible Cultural Heritage of Fujian and Taiwan, Ministry of Culture and Tourism, Xiamen University, China","institution_ids":["https://openalex.org/I191208505","https://openalex.org/I75867142"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129714538"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36907611,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"29","first_page":"24709","last_page":"24717"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3513000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3513000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12240000069141388,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.06480000168085098,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5943999886512756},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5354999899864197},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.532800018787384},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.48579999804496765},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4302000105381012},{"id":"https://openalex.org/keywords/non-monotonic-logic","display_name":"Non-monotonic logic","score":0.41760000586509705},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.413100004196167},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.4036000072956085}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6621999740600586},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6486999988555908},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5943999886512756},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5354999899864197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5329999923706055},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.48579999804496765},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.4036000072956085},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.34529998898506165},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3328000009059906},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i29.39656","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39656","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39656/43617","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i29.39656","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39656","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39656/43617","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.539400577545166,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320325946","display_name":"Minjiang University","ror":"https://ror.org/00s7tkw17"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138197918.pdf","grobid_xml":"https://content.openalex.org/works/W7138197918.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"research":[1],"reveals":[2],"that":[3,32,102],"a":[4,28,38,43,60,84,95,143],"minority":[5],"of":[6,14,45,66,88],"high-entropy":[7],"tokens":[8],"significantly":[9],"influence":[10],"the":[11],"reasoning":[12,35,77,115,139],"quality":[13],"large":[15],"language":[16],"models":[17,33],"(LLMs).":[18],"Inspired":[19],"by":[20,53],"this,":[21],"we":[22],"propose":[23],"Prototype":[24],"Entropy":[25],"Alignment":[26],"(PEA),":[27],"reinforcement":[29],"learning":[30],"framework":[31],"effective":[34],"not":[36],"as":[37,42,130],"single":[39],"path":[40],"but":[41],"collection":[44],"learnable":[46],"\"entropy":[47],"signatures.\"":[48],"PEA":[49,93,107,141],"identifies":[50],"these":[51,80],"signatures":[52],"clustering":[54],"expert":[55],"trajectories'":[56],"uncertainty":[57],"patterns":[58],"into":[59],"diverse":[61,136],"and":[62,113,137],"continuously":[63],"updated":[64],"set":[65],"prototypes.":[67],"The":[68],"model":[69],"is":[70,105],"then":[71],"rewarded":[72],"for":[73],"aligning":[74],"its":[75],"own":[76],"process":[78],"with":[79,120,135],"evolving":[81,138],"targets,":[82],"creating":[83],"self-improvement":[85],"loop.":[86],"Instead":[87],"replacing":[89],"traditional":[90],"outcome-based":[91],"rewards,":[92,122],"provides":[94],"complementary,":[96],"process-oriented":[97],"signal.":[98],"Our":[99],"experiments":[100],"show":[101],"this":[103],"synergy":[104],"crucial:":[106],"substantially":[108],"boosts":[109],"performance":[110],"on":[111,126],"creative":[112],"general":[114],"tasks":[116,128],"and,":[117],"when":[118],"combined":[119],"outcome":[121],"achieves":[123],"SOTA":[124],"results":[125],"structured":[127],"such":[129],"mathematics.":[131],"By":[132],"rewarding":[133],"alignment":[134],"structures,":[140],"offers":[142],"robust,":[144],"verifier-free":[145],"pathway":[146],"to":[147],"enhance":[148],"reasoning's":[149],"adaptability.":[150]},"counts_by_year":[],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2026-03-18T00:00:00"}
