{"id":"https://openalex.org/W2950940239","doi":"https://doi.org/10.18653/v1/p19-1286","title":"Domain Adaptation of Neural Machine Translation by Lexicon Induction","display_name":"Domain Adaptation of Neural Machine Translation by Lexicon Induction","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2950940239","doi":"https://doi.org/10.18653/v1/p19-1286","mag":"2950940239"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1286","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1286","pdf_url":"https://www.aclweb.org/anthology/P19-1286.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1286.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085263462","display_name":"Junjie Hu","orcid":"https://orcid.org/0000-0002-1911-4361"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I4210107233","display_name":"Language Science (South Korea)","ror":"https://ror.org/01h9v1373","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210107233"]}],"countries":["KR","US"],"is_corresponding":true,"raw_author_name":"Junjie Hu","raw_affiliation_strings":["Language Technologies Institute School of Computer Science Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute School of Computer Science Carnegie Mellon University","institution_ids":["https://openalex.org/I4210107233","https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003465314","display_name":"Mengzhou Xia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107233","display_name":"Language Science (South Korea)","ror":"https://ror.org/01h9v1373","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210107233"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Mengzhou Xia","raw_affiliation_strings":["Language Technologies Institute School of Computer Science Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute School of Computer Science Carnegie Mellon University","institution_ids":["https://openalex.org/I4210107233","https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068811427","display_name":"Graham Neubig","orcid":"https://orcid.org/0000-0002-2072-3789"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I4210107233","display_name":"Language Science (South Korea)","ror":"https://ror.org/01h9v1373","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210107233"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Graham Neubig","raw_affiliation_strings":["Language Technologies Institute School of Computer Science Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute School of Computer Science Carnegie Mellon University","institution_ids":["https://openalex.org/I4210107233","https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109861718","display_name":"Jaime Carbonell","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I4210107233","display_name":"Language Science (South Korea)","ror":"https://ror.org/01h9v1373","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210107233"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Jaime Carbonell","raw_affiliation_strings":["Language Technologies Institute School of Computer Science Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute School of Computer Science Carnegie Mellon University","institution_ids":["https://openalex.org/I4210107233","https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5085263462"],"corresponding_institution_ids":["https://openalex.org/I4210107233","https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":6.5028,"has_fulltext":true,"cited_by_count":58,"citation_normalized_percentile":{"value":0.97251236,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2989","last_page":"3001"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8555065393447876},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.7744090557098389},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7575442790985107},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7439868450164795},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7162294387817383},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.7105215787887573},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6638383865356445},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5825427770614624},{"id":"https://openalex.org/keywords/bleu","display_name":"BLEU","score":0.5690955519676208},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.568474531173706},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5208514332771301},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.506693422794342},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.47871923446655273},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41787129640579224},{"id":"https://openalex.org/keywords/transfer-based-machine-translation","display_name":"Transfer-based machine translation","score":0.4107722043991089},{"id":"https://openalex.org/keywords/example-based-machine-translation","display_name":"Example-based machine translation","score":0.31778281927108765},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11105954647064209},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.05470886826515198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8555065393447876},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.7744090557098389},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7575442790985107},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7439868450164795},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7162294387817383},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.7105215787887573},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6638383865356445},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5825427770614624},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.5690955519676208},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.568474531173706},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5208514332771301},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.506693422794342},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.47871923446655273},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41787129640579224},{"id":"https://openalex.org/C130597682","wikidata":"https://www.wikidata.org/wiki/Q6961922","display_name":"Transfer-based machine translation","level":4,"score":0.4107722043991089},{"id":"https://openalex.org/C24687705","wikidata":"https://www.wikidata.org/wiki/Q3753284","display_name":"Example-based machine translation","level":3,"score":0.31778281927108765},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11105954647064209},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.05470886826515198},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/p19-1286","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1286","pdf_url":"https://www.aclweb.org/anthology/P19-1286.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1286","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1286","pdf_url":"https://www.aclweb.org/anthology/P19-1286.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6000000238418579,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3429874898","display_name":null,"funder_award_id":"LORELEI","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332815","display_name":"Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2950940239.pdf","grobid_xml":"https://content.openalex.org/works/W2950940239.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1915251500","https://openalex.org/W2101456909","https://openalex.org/W2133564696","https://openalex.org/W2136156618","https://openalex.org/W2156985047","https://openalex.org/W2172268343","https://openalex.org/W2251335508","https://openalex.org/W2294774419","https://openalex.org/W2493916176","https://openalex.org/W2555428947","https://openalex.org/W2561274697","https://openalex.org/W2567571499","https://openalex.org/W2740743644","https://openalex.org/W2753639998","https://openalex.org/W2756566411","https://openalex.org/W2758310181","https://openalex.org/W2759461255","https://openalex.org/W2760452458","https://openalex.org/W2798931235","https://openalex.org/W2805394970","https://openalex.org/W2885616807","https://openalex.org/W2890244613","https://openalex.org/W2903193068","https://openalex.org/W2962708992","https://openalex.org/W2962784628","https://openalex.org/W2962824887","https://openalex.org/W2963118869","https://openalex.org/W2963122608","https://openalex.org/W2963212250","https://openalex.org/W2963216553","https://openalex.org/W2963403868","https://openalex.org/W2963506925","https://openalex.org/W2963897095","https://openalex.org/W2964013027","https://openalex.org/W2964308564","https://openalex.org/W3098341425","https://openalex.org/W3204406378","https://openalex.org/W4298393544","https://openalex.org/W4299579390","https://openalex.org/W4307459710","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W193726211","https://openalex.org/W2399355078","https://openalex.org/W2610387714","https://openalex.org/W2167761917","https://openalex.org/W2887289680","https://openalex.org/W2140460368","https://openalex.org/W2947177396","https://openalex.org/W1978161643","https://openalex.org/W4385572994","https://openalex.org/W2771673059"],"abstract_inverted_index":{"It":[0],"has":[1],"been":[2],"previously":[3],"noted":[4],"that":[5,21],"neural":[6],"machine":[7],"translation":[8],"(NMT)":[9],"is":[10,23],"very":[11],"sensitive":[12],"to":[13,78,123,131],"domain":[14],"shift.":[15],"In":[16,98],"this":[17,22,54],"paper,":[18],"we":[19,56,74],"argue":[20],"a":[24,64,70,85],"dual":[25],"effect":[26],"of":[27,32,42,47,93],"the":[28],"highly":[29],"lexicalized":[30],"nature":[31],"NMT,":[33],"resulting":[34],"in":[35],"failure":[36],"for":[37,49],"sentences":[38],"with":[39],"large":[40],"numbers":[41],"unknown":[43],"words,":[44],"and":[45,83,106,129],"lack":[46],"supervision":[48],"domain-specific":[50],"words.":[51],"To":[52],"remedy":[53],"problem,":[55],"propose":[57],"an":[58,80],"unsupervised":[59],"adaptation":[60,104],"method":[61,111],"which":[62],"finetunes":[63],"pre-trained":[65],"out-of-domain":[66],"NMT":[67],"model":[68,108],"using":[69,116],"pseudo-in-domain":[71],"corpus.":[72],"Specifically,":[73],"perform":[75],"lexicon":[76],"induction":[77],"extract":[79],"in-domain":[81,87,95,118],"lexicon,":[82],"construct":[84],"pseudo-parallel":[86],"corpus":[88],"by":[89],"performing":[90],"word-for-word":[91],"back-translation":[92,136],"monolingual":[94],"target":[96],"sentences.":[97],"five":[99],"domains":[100],"over":[101,126,134],"twenty":[102],"pairwise":[103],"settings":[105],"two":[107],"architectures,":[109],"our":[110],"achieves":[112],"consistent":[113],"improvements":[114],"without":[115],"any":[117],"parallel":[119],"sentences,":[120],"improving":[121],"up":[122,130],"14":[124],"BLEU":[125,133],"unadapted":[127],"models,":[128],"2":[132],"strong":[135],"baselines.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
