{"id":"https://openalex.org/W3023622314","doi":"https://doi.org/10.18653/v1/2021.eacl-main.301","title":"Recipes for Adapting Pre-trained Monolingual and Multilingual Models to Machine Translation","display_name":"Recipes for Adapting Pre-trained Monolingual and Multilingual Models to Machine Translation","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3023622314","doi":"https://doi.org/10.18653/v1/2021.eacl-main.301","mag":"3023622314"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2021.eacl-main.301","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.301","pdf_url":"https://aclanthology.org/2021.eacl-main.301.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2021.eacl-main.301.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024678824","display_name":"Asa Cooper Stickland","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["GB","IL"],"is_corresponding":true,"raw_author_name":"Asa Cooper Stickland","raw_affiliation_strings":["University of Edinburgh,  Facebook AI","University of Edinburgh,"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,  Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"University of Edinburgh,","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061256224","display_name":"Xian Li","orcid":"https://orcid.org/0000-0002-1509-9328"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Xian Li","raw_affiliation_strings":["University of Edinburgh,  Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,  Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011974509","display_name":"Marjan Ghazvininejad","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Marjan Ghazvininejad","raw_affiliation_strings":["University of Edinburgh,  Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,  Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5024678824"],"corresponding_institution_ids":["https://openalex.org/I2252078561","https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":0.5599,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.71504002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.820426344871521},{"id":"https://openalex.org/keywords/bleu","display_name":"BLEU","score":0.7863255739212036},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7189961075782776},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.718422532081604},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.6221888065338135},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6183499693870544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6136139631271362},{"id":"https://openalex.org/keywords/nepali","display_name":"Nepali","score":0.6000006794929504},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5831440091133118},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4721662700176239},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4641163945198059},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44976645708084106},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44536134600639343},{"id":"https://openalex.org/keywords/czech","display_name":"Czech","score":0.44305887818336487},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1635110080242157}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.820426344871521},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.7863255739212036},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7189961075782776},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.718422532081604},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.6221888065338135},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6183499693870544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6136139631271362},{"id":"https://openalex.org/C2780068402","wikidata":"https://www.wikidata.org/wiki/Q33823","display_name":"Nepali","level":2,"score":0.6000006794929504},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5831440091133118},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4721662700176239},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4641163945198059},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44976645708084106},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44536134600639343},{"id":"https://openalex.org/C2777842544","wikidata":"https://www.wikidata.org/wiki/Q9056","display_name":"Czech","level":2,"score":0.44305887818336487},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1635110080242157},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/2021.eacl-main.301","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.301","pdf_url":"https://aclanthology.org/2021.eacl-main.301.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2004.14911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2004.14911","pdf_url":"https://arxiv.org/pdf/2004.14911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3023622314","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2004.14911.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2004.14911","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2004.14911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18653/v1/2021.eacl-main.301","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.eacl-main.301","pdf_url":"https://aclanthology.org/2021.eacl-main.301.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8199999928474426}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3023622314.pdf","grobid_xml":"https://content.openalex.org/works/W3023622314.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W222053410","https://openalex.org/W630532510","https://openalex.org/W2130942839","https://openalex.org/W2170716095","https://openalex.org/W2550821151","https://openalex.org/W2555428947","https://openalex.org/W2741838462","https://openalex.org/W2887920589","https://openalex.org/W2911300548","https://openalex.org/W2919290281","https://openalex.org/W2933138175","https://openalex.org/W2944815030","https://openalex.org/W2945260553","https://openalex.org/W2949920209","https://openalex.org/W2962739339","https://openalex.org/W2962784628","https://openalex.org/W2963088995","https://openalex.org/W2963247703","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963506925","https://openalex.org/W2963532001","https://openalex.org/W2963970792","https://openalex.org/W2963983698","https://openalex.org/W2963993537","https://openalex.org/W2964121744","https://openalex.org/W2964303773","https://openalex.org/W2964308564","https://openalex.org/W2965373594","https://openalex.org/W2967985939","https://openalex.org/W2970352191","https://openalex.org/W2970597249","https://openalex.org/W2970925270","https://openalex.org/W2970925677","https://openalex.org/W2981852735","https://openalex.org/W2982399380","https://openalex.org/W3006381853","https://openalex.org/W3035390927","https://openalex.org/W3107826490"],"related_works":["https://openalex.org/W3153805297","https://openalex.org/W3199365283","https://openalex.org/W3160233719","https://openalex.org/W2284660317","https://openalex.org/W3002602684","https://openalex.org/W3161525130","https://openalex.org/W3200578235","https://openalex.org/W2996854111","https://openalex.org/W2122270629","https://openalex.org/W1915251500","https://openalex.org/W3179401712","https://openalex.org/W3091540052","https://openalex.org/W2251071050","https://openalex.org/W2963174344","https://openalex.org/W3093189471","https://openalex.org/W3122836184","https://openalex.org/W2951521088","https://openalex.org/W3002887707","https://openalex.org/W2798931235","https://openalex.org/W2988121211"],"abstract_inverted_index":{"There":[0],"has":[1],"been":[2],"recent":[3],"success":[4],"in":[5],"pre-training":[6],"on":[7,12,51,55,62,72],"monolingual":[8,64,73],"data":[9,74],"and":[10,37,42,93,116],"fine-tuning":[11,47,108,150],"Machine":[13],"Translation":[14],"(MT),":[15],"but":[16],"it":[17],"remains":[18],"unclear":[19],"how":[20],"to":[21,129,134,141],"best":[22,84],"leverage":[23],"a":[24,28,48,58,69],"pre-trained":[25,49],"model":[26,50,59,70,91],"for":[27,109,139],"given":[29],"MT":[30],"task.":[31],"This":[32],"paper":[33],"investigates":[34],"the":[35,83,90,104,114,119,145,149],"benefits":[36],"drawbacks":[38],"of":[39,89,106,118],"freezing":[40,87],"parameters,":[41,92],"adding":[43,94],"new":[44],"ones,":[45],"when":[46],"MT.":[52],"We":[53],"focus":[54],"1)":[56],"Fine-tuning":[57,68],"trained":[60,71],"only":[61],"English":[63,142],"data,":[65],"BART.":[66],"2)":[67],"from":[75],"25":[76],"languages,":[77],"mBART.":[78],"For":[79,98],"BART":[80],"we":[81,100,143],"get":[82],"performance":[85,105],"by":[86],"most":[88,110,117,127],"extra":[95],"positional":[96],"embeddings.":[97],"mBART":[99],"match":[101],"or":[102],"outperform":[103],"naive":[107],"language":[111],"pairs":[112],"with":[113],"encoder,":[115],"decoder,":[120],"frozen.":[121],"The":[122],"encoder-decoder":[123],"attention":[124],"parameters":[125],"are":[126],"important":[128],"finetune.":[130],"When":[131],"constraining":[132],"ourselves":[133],"an":[135],"outof-domain":[136],"training":[137],"set":[138],"Vietnamese":[140],"see":[144],"largest":[146],"improvements":[147],"over":[148],"baseline.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
