{"id":"https://openalex.org/W7159029830","doi":"https://doi.org/10.48550/arxiv.2604.26553","title":"TLPO: Token-Level Policy Optimization for Mitigating Language Confusion in Large Language Models","display_name":"TLPO: Token-Level Policy Optimization for Mitigating Language Confusion in Large Language Models","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7159029830","doi":"https://doi.org/10.48550/arxiv.2604.26553"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.26553","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26553","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.26553","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134881012","display_name":"Jinho Choo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Choo, Jinho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134894773","display_name":"JunSeung Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, JunSeung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037195485","display_name":"Jimyeong Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jimyeong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006136942","display_name":"Yeeho Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Yeeho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127984624","display_name":"S. K. Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, S. K.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134880502","display_name":"Yeong-Dae Kwon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwon, Yeong-Dae","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5134881012"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36480000615119934,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36480000615119934,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08829999715089798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.08380000293254852,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.7023000121116638},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.660099983215332},{"id":"https://openalex.org/keywords/unintended-consequences","display_name":"Unintended consequences","score":0.5967000126838684},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5346999764442444},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5195000171661377},{"id":"https://openalex.org/keywords/intervention","display_name":"Intervention (counseling)","score":0.44850000739097595},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.32820001244544983}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7263000011444092},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.7023000121116638},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.660099983215332},{"id":"https://openalex.org/C2776889888","wikidata":"https://www.wikidata.org/wiki/Q1135789","display_name":"Unintended consequences","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5346999764442444},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5195000171661377},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.44850000739097595},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3716000020503998},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3472999930381775},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3411000072956085},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.32820001244544983},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2912999987602234},{"id":"https://openalex.org/C2779313563","wikidata":"https://www.wikidata.org/wiki/Q17072565","display_name":"On Language","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.25110000371932983},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.26553","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26553","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.26553","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26553","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4782458245754242}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,24,78,115,139],"models":[2],"(LLMs)":[3],"demonstrate":[4,131],"strong":[5],"multilingual":[6,126],"capabilities,":[7,55],"yet":[8],"often":[9],"fail":[10],"to":[11,49,76,100],"consistently":[12],"generate":[13],"responses":[14,45],"in":[15,137],"the":[16,41,57,94,119],"intended":[17],"language,":[18],"exhibiting":[19],"a":[20,72,97,105],"phenomenon":[21],"known":[22],"as":[23,34],"confusion.":[25],"Prior":[26],"mitigation":[27,113],"approaches":[28],"based":[29],"on":[30,124],"sequence-level":[31],"fine-tuning,":[32],"such":[33],"DPO,":[35],"ORPO,":[36],"and":[37,46,92],"GRPO,":[38],"operate":[39],"at":[40,104],"level":[42],"of":[43,52,114],"entire":[44],"can":[47],"lead":[48],"unintended":[50],"degradation":[51],"general":[53,121],"model":[54],"motivating":[56],"need":[58],"for":[59],"more":[60],"fine-grained":[61],"alternatives.":[62],"To":[63],"address":[64],"this,":[65],"we":[66],"introduce":[67],"Token-Level":[68],"Policy":[69],"Optimization":[70],"(TLPO),":[71],"fine-tuning":[73],"framework":[74],"designed":[75],"mitigate":[77],"confusion":[79,116],"through":[80],"localized,":[81],"token-level":[82],"updates.":[83],"TLPO":[84,133],"identifies":[85],"error-prone":[86],"positions,":[87],"explores":[88],"alternative":[89],"candidate":[90],"tokens,":[91],"updates":[93],"policy":[95],"using":[96],"tailored":[98],"objective":[99],"suppress":[101],"error-inducing":[102],"outputs":[103],"granular":[106],"level.":[107],"This":[108],"selective":[109],"intervention":[110],"enables":[111],"effective":[112],"without":[117],"compromising":[118],"model's":[120],"abilities.":[122],"Experiments":[123],"multiple":[125],"LLMs":[127],"across":[128],"diverse":[129],"languages":[130],"that":[132],"significantly":[134],"outperforms":[135],"baselines":[136],"improving":[138],"consistency":[140],"while":[141],"preserving":[142],"downstream":[143],"task":[144],"accuracy.":[145]},"counts_by_year":[],"updated_date":"2026-05-01T06:10:29.291645","created_date":"2026-05-01T00:00:00"}
