{"id":"https://openalex.org/W7139933398","doi":"https://doi.org/10.48550/arxiv.2603.19152","title":"VEPO: Variable Entropy Policy Optimization for Low-Resource Language Foundation Models","display_name":"VEPO: Variable Entropy Policy Optimization for Low-Resource Language Foundation Models","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139933398","doi":"https://doi.org/10.48550/arxiv.2603.19152"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19152","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024924372","display_name":"Chonghan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Chonghan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075264825","display_name":"Yimin Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Yimin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130244475","display_name":"Qi An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130248054","display_name":"Xin He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130241357","display_name":"Cunqi Zhai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhai, Cunqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130225348","display_name":"Fei Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130229339","display_name":"Weijia Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Weijia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Gong, Xiaochun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Xiaochun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113744967","display_name":"Yongchao Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yongchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123493679","display_name":"Shousheng Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Shousheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130238807","display_name":"Xiangzheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiangzheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5024924372"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5819000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5819000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.062199998646974564,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.032600000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.478300005197525},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.38850000500679016},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.38339999318122864},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.3806999921798706},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.37619999051094055},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.35910001397132874},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.34950000047683716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6682000160217285},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.478300005197525},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4212000072002411},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3901999890804291},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.38850000500679016},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.38339999318122864},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3806999921798706},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.32760000228881836},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C122770356","wikidata":"https://www.wikidata.org/wiki/Q1656753","display_name":"Identifiability","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.30390000343322754},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"frequently":[3],"exhibit":[4],"suboptimal":[5],"performance":[6,137],"on":[7],"low":[8],"resource":[9],"languages,":[10],"primarily":[11],"due":[12],"to":[13,39,68,80],"inefficient":[14],"subword":[15],"segmentation":[16],"and":[17,58,88,132],"systemic":[18],"training":[19],"data":[20],"imbalances.":[21],"In":[22],"this":[23],"paper,":[24],"we":[25],"propose":[26],"Variable":[27],"Entropy":[28],"Policy":[29],"Optimization":[30],"(VEPO),":[31],"which":[32],"leverages":[33],"Reinforcement":[34],"Learning":[35],"with":[36,103],"Verifiable":[37],"Rewards":[38],"incorporate":[40],"deterministic":[41],"structural":[42],"constraints":[43],"into":[44],"the":[45,78,83,93,136],"policy":[46,112],"alignment":[47],"process.":[48],"This":[49],"framework":[50],"ensures":[51],"prescribed":[52],"sequence":[53],"length,":[54],"robust":[55,108],"format":[56],"consistency,":[57],"rigorous":[59],"linguistic":[60],"well":[61],"formedness,":[62],"all":[63],"enforced":[64],"during":[65],"training.":[66],"Central":[67],"our":[69],"approach":[70],"is":[71],"a":[72],"variable":[73],"entropy":[74,99],"mechanism":[75],"that":[76,123],"enables":[77],"model":[79],"dynamically":[81],"calibrate":[82],"equilibrium":[84],"between":[85],"literal":[86],"fidelity":[87],"semantic":[89],"naturalness":[90],"by":[91],"modulating":[92],"exploration":[94,109],"exploitation":[95],"manifold.":[96],"By":[97],"integrating":[98],"tempered":[100],"advantage":[101],"estimation":[102],"asymmetric":[104],"clipping,":[105],"VEPO":[106,124],"sustains":[107],"while":[110],"mitigating":[111],"collapse.":[113],"Empirical":[114],"evaluations":[115],"across":[116],"90":[117],"FLORES-200,":[118],"COMET-22,":[119],"chrF":[120],"directions":[121],"demonstrate":[122],"yields":[125],"substantial":[126],"improvements":[127],"in":[128],"both":[129],"tokenization":[130],"efficiency":[131],"translation":[133],"quality,":[134],"bridging":[135],"gap":[138],"for":[139],"underrepresented":[140],"languages.":[141]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-03-21T00:00:00"}
