{"id":"https://openalex.org/W7162339790","doi":"https://doi.org/10.48550/arxiv.2605.23885","title":"Multilingual Knowledge Transfer under Data Constraints via Lexical Interventions","display_name":"Multilingual Knowledge Transfer under Data Constraints via Lexical Interventions","publication_year":2026,"publication_date":"2026-05-22","ids":{"openalex":"https://openalex.org/W7162339790","doi":"https://doi.org/10.48550/arxiv.2605.23885"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.23885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.23885","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068681171","display_name":"Anastasiia Sedova","orcid":"https://orcid.org/0000-0002-8544-4166"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sedova, Anastasiia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056555815","display_name":"Natalie Schluter","orcid":"https://orcid.org/0000-0001-8215-4801"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schluter, Natalie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059839283","display_name":"Skyler Seto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seto, Skyler","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017591085","display_name":"Maartje ter Hoeve","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"ter Hoeve, Maartje","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6421999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6421999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0869000032544136,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08139999955892563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.5184000134468079},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.48890000581741333},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.4799000024795532},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.45500001311302185},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4546000063419342},{"id":"https://openalex.org/keywords/transfer-of-training","display_name":"Transfer of training","score":0.427700012922287},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4171999990940094},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3587999939918518}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7706000208854675},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6478999853134155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5766000151634216},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.5184000134468079},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.48890000581741333},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.4799000024795532},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.45500001311302185},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C2777938197","wikidata":"https://www.wikidata.org/wiki/Q7834022","display_name":"Transfer of training","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3587999939918518},{"id":"https://openalex.org/C126706616","wikidata":"https://www.wikidata.org/wiki/Q2944660","display_name":"Lexical item","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.23885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.23885","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6693949699401855,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cross-lingual":[0],"knowledge":[1,24,37,48,57,90],"transfer":[2,49,58,91],"is":[3,21],"critical":[4],"for":[5,11,26,53,77,147],"building":[6],"high-performing":[7],"multilingual":[8],"language":[9,19],"models":[10],"languages":[12,154],"with":[13,126,169],"insufficient":[14],"training":[15,71,122,134,176],"data.":[16],"When":[17],"target":[18,167],"data":[20,103],"scarce,":[22],"the":[23,43,119,166],"required":[25],"many":[27,78],"downstream":[28,163],"tasks":[29,164],"involving":[30],"scientific":[31],"reasoning,":[32],"commonsense":[33],"inference,":[34],"and":[35,135],"world":[36],"must":[38],"be":[39,142],"acquired":[40],"primarily":[41],"from":[42],"high-resource":[44,99,120],"language,":[45,168],"making":[46],"effective":[47],"essential.":[50],"Existing":[51],"methods":[52],"improving":[54],"such":[55],"cross-lingual":[56],"require":[59],"large":[60],"amounts":[61],"of":[62,101,118],"parallel":[63],"data,":[64],"translation":[65],"systems,":[66],"auxiliary":[67],"models,":[68],"or":[69],"additional":[70,132],"stages":[72],"that":[73,88],"are":[74,124],"largely":[75],"unavailable":[76],"languages.":[79],"We":[80],"propose":[81],"LINK":[82],"-":[83],"a":[84,108,116,137,172],"data-level":[85],"intervention":[86],"method":[87],"improves":[89],"during":[92],"model":[93,133,157],"pretraining":[94,102],"through":[95],"lexical":[96],"substitutions":[97],"in":[98,115,165,175],"part":[100],"using":[104],"bilingual":[105,138],"vocabularies.":[106],"For":[107],"given":[109],"replacement":[110],"ratio,":[111],"randomly":[112],"selected":[113],"words":[114],"portion":[117],"(English)":[121],"corpus":[123],"swapped":[125],"their":[127],"word-level":[128],"translations,":[129],"requiring":[130],"no":[131],"only":[136],"vocabulary,":[139],"which":[140],"can":[141],"obtained":[143],"at":[144],"near-zero":[145],"cost":[146],"virtually":[148],"any":[149],"language.":[150],"Evaluation":[151],"on":[152,162],"eight":[153],"across":[155],"five":[156],"sizes":[158],"shows":[159],"notable":[160],"improvements":[161],"up":[170],"to":[171,177],"2x":[173],"speedup":[174],"reach":[178],"equivalent":[179],"performance.":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-26T00:00:00"}
