{"id":"https://openalex.org/W7155055770","doi":"https://doi.org/10.48550/arxiv.2604.16881","title":"Incentivizing Parametric Knowledge via Reinforcement Learning with Verifiable Rewards for Cross-Cultural Entity Translation","display_name":"Incentivizing Parametric Knowledge via Reinforcement Learning with Verifiable Rewards for Cross-Cultural Entity Translation","publication_year":2026,"publication_date":"2026-04-18","ids":{"openalex":"https://openalex.org/W7155055770","doi":"https://doi.org/10.48550/arxiv.2604.16881"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16881","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134186392","display_name":"Jiang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhou, Jiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134145453","display_name":"Xiaohu Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Xiaohu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134207266","display_name":"Xinwei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Xinwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134173667","display_name":"Tianyu Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Tianyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100446147","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0002-6567-4503"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134165365","display_name":"Yangyang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yangyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134175993","display_name":"Heng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Heng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134114209","display_name":"Linlong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Linlong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134141966","display_name":"Longyue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Longyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134166382","display_name":"Weihua Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Weihua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134137762","display_name":"Deyi Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Deyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5134186392"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.26930001378059387,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.26930001378059387,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.2563999891281128,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0957999974489212,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.699999988079071},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6858999729156494},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5774999856948853},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5758000016212463},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5365999937057495},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.48559999465942383},{"id":"https://openalex.org/keywords/literal","display_name":"Literal (mathematical logic)","score":0.40290001034736633},{"id":"https://openalex.org/keywords/literal-translation","display_name":"Literal translation","score":0.3677000105381012}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7699000239372253},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.699999988079071},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6858999729156494},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.619700014591217},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5774999856948853},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5758000016212463},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5365999937057495},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4848000109195709},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4650999903678894},{"id":"https://openalex.org/C2780882242","wikidata":"https://www.wikidata.org/wiki/Q14235582","display_name":"Literal (mathematical logic)","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C2777761643","wikidata":"https://www.wikidata.org/wiki/Q1191837","display_name":"Literal translation","level":3,"score":0.3677000105381012},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2808000147342682},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7158101797103882,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cross-cultural":[0],"entity":[1,61,114,128,146],"translation":[2,62,115,129,147],"remains":[3],"challenging":[4],"for":[5],"large":[6],"language":[7],"models":[8],"(LLMs)":[9],"as":[10],"literal":[11],"or":[12],"phonetic":[13],"renderings":[14],"are":[15],"usually":[16],"yielded":[17],"instead":[18],"of":[19,43,168],"culturally":[20],"appropriate":[21],"translations":[22],"in":[23,32,112],"context.":[24],"However,":[25],"relevant":[26],"knowledge":[27,67],"may":[28],"already":[29],"be":[30],"encoded":[31],"model":[33,90],"parameters":[34],"during":[35],"large-scale":[36],"pre-training.":[37],"To":[38],"incentivize":[39],"the":[40,89],"effective":[41],"use":[42],"parametric":[44],"knowledge,":[45],"we":[46],"propose":[47],"EA-RLVR":[48,69,105],"(Entity-Anchored":[49],"Reinforcement":[50],"Learning":[51],"with":[52,163],"Verifiable":[53],"Rewards),":[54],"a":[55,73,93,136,182],"training":[56,121],"framework":[57],"that":[58],"optimizes":[59],"cross-cultural":[60],"without":[63],"relying":[64],"on":[65,72,106,122,135,157],"external":[66],"bases.":[68],"anchors":[70],"supervision":[71],"verifiable,":[74],"entity-level":[75],"reward":[76,172],"signal":[77],"and":[78,108,117,171,181],"incorporates":[79],"lightweight":[80],"structural":[81],"gates":[82],"to":[83,133,151,161,177],"stabilize":[84],"optimization.":[85,165],"This":[86],"design":[87],"steers":[88],"toward":[91],"learning":[92],"robust":[94],"reasoning":[95],"process":[96],"rather":[97],"than":[98],"merely":[99,123],"imitating":[100],"reference":[101],"translations.":[102],"We":[103],"evaluate":[104],"XC-Translate":[107],"observe":[109],"consistent":[110],"improvements":[111],"both":[113],"accuracy":[116,130],"out-of-domain":[118],"generalization.":[119],"Specifically,":[120],"7k":[124],"samples":[125],"boosts":[126],"Qwen3-14B's":[127],"from":[131],"23.66\\%":[132],"31.87\\%":[134],"50k":[137],"test":[138],"set":[139],"comprising":[140],"entirely":[141],"unseen":[142],"entities.":[143],"The":[144],"learned":[145],"ability":[148],"also":[149],"transfers":[150],"general":[152],"translation,":[153],"yielding":[154],"+1.35":[155],"XCOMET":[156],"WMT24++,":[158],"which":[159],"scales":[160],"+1.59":[162],"extended":[164],"Extensive":[166],"analyses":[167],"$pass@k$":[169],"dynamics":[170],"formulations":[173],"attribute":[174],"these":[175],"gains":[176],"superior":[178],"sampling":[179],"efficiency":[180],"stable":[183],"optimization":[184],"landscape.":[185]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-22T00:00:00"}
