{"id":"https://openalex.org/W7161003034","doi":"https://doi.org/10.48550/arxiv.2605.12112","title":"When Policy Entropy Constraint Fails: Preserving Diversity in Flow-based RLHF via Perceptual Entropy","display_name":"When Policy Entropy Constraint Fails: Preserving Diversity in Flow-based RLHF via Perceptual Entropy","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161003034","doi":"https://doi.org/10.48550/arxiv.2605.12112"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12112","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12112","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12112","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136025648","display_name":"Xiaofeng Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Xiaofeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136003367","display_name":"Jun Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136013322","display_name":"Bin-Bin Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Bin-Bin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101323076","display_name":"Yuanting Fan","orcid":"https://orcid.org/0009-0008-6507-666X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Yuanting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136038627","display_name":"Xi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136027286","display_name":"Chengjie Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chengjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136033216","display_name":"Hongsong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hongsong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136072908","display_name":"Feng Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6985999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6985999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1712999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.014999999664723873,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6484000086784363},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.6179999709129333},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.4449999928474426},{"id":"https://openalex.org/keywords/conditional-entropy","display_name":"Conditional entropy","score":0.4041999876499176},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.3621000051498413},{"id":"https://openalex.org/keywords/joint-entropy","display_name":"Joint entropy","score":0.30070000886917114}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6484000086784363},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.6179999709129333},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4668000042438507},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4526999890804291},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.4449999928474426},{"id":"https://openalex.org/C101721835","wikidata":"https://www.wikidata.org/wiki/Q813908","display_name":"Conditional entropy","level":3,"score":0.4041999876499176},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4004000127315521},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C106752470","wikidata":"https://www.wikidata.org/wiki/Q1364826","display_name":"Joint entropy","level":3,"score":0.30070000886917114},{"id":"https://openalex.org/C125252325","wikidata":"https://www.wikidata.org/wiki/Q1345213","display_name":"Entropy rate","level":4,"score":0.3001999855041504},{"id":"https://openalex.org/C24495805","wikidata":"https://www.wikidata.org/wiki/Q7269091","display_name":"Quantum relative entropy","level":5,"score":0.2955000102519989},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2870999872684479},{"id":"https://openalex.org/C28063669","wikidata":"https://www.wikidata.org/wiki/Q7167042","display_name":"Perceptual system","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.26759999990463257},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2637999951839447},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.26010000705718994},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12112","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12112","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12112","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12112","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"RLHF":[0],"is":[1,24,75,202],"widely":[2],"used":[3],"to":[4,15,27,90,96,144],"align":[5],"flow-matching":[6],"text-to-image":[7],"models":[8],"with":[9,29],"human":[10],"preferences,":[11],"but":[12],"often":[13,25],"leads":[14],"severe":[16],"diversity":[17,23,51,73,114,147,191],"collapse":[18,74],"after":[19],"fine-tuning.":[20],"In":[21],"RL,":[22],"assumed":[26],"correlate":[28],"policy":[30,44,82,87],"entropy,":[31],"motivating":[32],"entropy":[33,45,63,88,111],"regularization.":[34],"However,":[35],"we":[36,108,130],"show":[37],"this":[38,55,106,128],"intuition":[39],"breaks":[40],"in":[41,101,115,168],"flow":[42],"models:":[43],"remains":[46],"constant,":[47],"even":[48],"while":[49,71],"perceptual":[50,103,110,117,146,163],"collapses.":[52],"We":[53],"explain":[54],"mismatch":[56],"both":[57],"theoretically":[58],"and":[59,119,138,148,158,161],"empirically:":[60],"the":[61,66,72,78,92,102,121,150,169,174],"constant":[62],"arises":[64],"from":[65,94],"fixed,":[67],"pre-defined":[68],"noise":[69],"schedule,":[70],"driven":[76],"by":[77],"mode-seeking":[79],"nature":[80],"of":[81,123,178,186,193],"gradients.":[83],"As":[84],"a":[85,97,116,183,190],"result,":[86],"fails":[89],"prevent":[91],"model":[93],"converging":[95],"narrow":[98],"high-reward":[99],"region":[100],"space.":[104],"To":[105],"end,":[107],"introduce":[109],"that":[112],"captures":[113],"space":[118],"maintains":[120],"property":[122],"standard":[124],"entropy.":[125],"Building":[126],"upon":[127],"insight,":[129],"propose":[131],"two":[132,154],"entropy-regularized":[133],"strategies,":[134],"Perceptual":[135,139],"Entropy":[136],"Constraint":[137],"Constraints":[140],"on":[141],"Generation":[142],"Space,":[143],"preserve":[145],"improve":[149],"quality.":[151],"Experiments":[152],"across":[153],"base":[155],"models,":[156],"neural":[157],"rule-based":[159],"rewards,":[160],"three":[162],"spaces":[164],"demonstrate":[165],"consistent":[166],"gains":[167],"quality-diversity":[170],"trade-off;":[171],"PEC":[172,187],"achieves":[173],"best":[175],"overall":[176],"score":[177],"0.734":[179],"(vs.":[180,195],"baseline's":[181,196],"0.366);":[182],"complementary":[184],"setting":[185],"further":[188],"reaches":[189],"average":[192],"0.989":[194],"0.047).":[197],"Our":[198],"project":[199],"page":[200],"(https://xiaofeng-tan.github.io/projects/PEC)":[201],"publicly":[203],"available.":[204]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-14T00:00:00"}
