{"id":"https://openalex.org/W4388691873","doi":"https://doi.org/10.1162/coli_a_00496","title":"Rethinking the Exploitation of Monolingual Data for Low-Resource Neural Machine Translation","display_name":"Rethinking the Exploitation of Monolingual Data for Low-Resource Neural Machine Translation","publication_year":2023,"publication_date":"2023-11-15","ids":{"openalex":"https://openalex.org/W4388691873","doi":"https://doi.org/10.1162/coli_a_00496"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00496","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00496","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00496/2177314/coli_a_00496.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00496/2177314/coli_a_00496.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051105922","display_name":"Jianhui Pang","orcid":"https://orcid.org/0000-0001-8093-867X"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Jianhui Pang","raw_affiliation_strings":["NLP2CT Lab, University of Macau. nlp2ct.pangjh3@gmail.com"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. nlp2ct.pangjh3@gmail.com","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028040391","display_name":"Baosong Yang","orcid":"https://orcid.org/0000-0001-5002-2409"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Baosong Yang*","raw_affiliation_strings":["Alibaba Group. yangbaosong.ybs@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. yangbaosong.ybs@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101468579","display_name":"Derek F. Wong","orcid":"https://orcid.org/0000-0002-5307-7322"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Derek Fai Wong*","raw_affiliation_strings":["NLP2CT Lab, University of Macau. derekfw@um.edu.mo"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. derekfw@um.edu.mo","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377278","display_name":"Yu Wan","orcid":"https://orcid.org/0000-0002-7310-7883"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Wan","raw_affiliation_strings":["Alibaba Group. wanyu.wy@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. wanyu.wy@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062188134","display_name":"Dayiheng Liu","orcid":"https://orcid.org/0000-0002-8755-8941"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dayiheng Liu","raw_affiliation_strings":["Alibaba Group. liudayiheng.ldyh@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. liudayiheng.ldyh@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025832925","display_name":"Lidia S. Chao","orcid":"https://orcid.org/0000-0001-6629-170X"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Lidia Sam Chao","raw_affiliation_strings":["NLP2CT Lab, University of Macau. lidiasc@um.edu.mo"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. lidiasc@um.edu.mo","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101791703","display_name":"Jun Xie","orcid":"https://orcid.org/0000-0003-0955-9970"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Xie","raw_affiliation_strings":["Alibaba Group. qingjing.xj@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. qingjing.xj@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5051105922"],"corresponding_institution_ids":["https://openalex.org/I204512498"],"apc_list":null,"apc_paid":null,"fwci":1.5555,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.8667653,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"50","issue":"1","first_page":"25","last_page":"47"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7780529260635376},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7662825584411621},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5638625621795654},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5183166265487671},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4704369902610779},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44919514656066895},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0846533477306366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7780529260635376},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7662825584411621},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5638625621795654},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5183166265487671},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4704369902610779},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44919514656066895},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0846533477306366},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/coli_a_00496","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00496","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00496/2177314/coli_a_00496.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:336f4ccc4e544e05be92c08b7497a48e","is_oa":true,"landing_page_url":"https://doaj.org/article/336f4ccc4e544e05be92c08b7497a48e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 50, Iss 1 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00496","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00496","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00496/2177314/coli_a_00496.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1286881433","display_name":null,"funder_award_id":"FDCT/060/2022/AFJ","funder_id":"https://openalex.org/F4320323893","funder_display_name":"Fundo para o Desenvolvimento das Ci\u00eancias e da Tecnologia"},{"id":"https://openalex.org/G3699111562","display_name":null,"funder_award_id":"MYRG2020-00054-FST","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G6889481450","display_name":null,"funder_award_id":"FDCT/0070/2022/AMJ","funder_id":"https://openalex.org/F4320323893","funder_display_name":"Fundo para o Desenvolvimento das Ci\u00eancias e da Tecnologia"},{"id":"https://openalex.org/G8521488234","display_name":null,"funder_award_id":"0070/2022/A","funder_id":"https://openalex.org/F4320323893","funder_display_name":"Fundo para o Desenvolvimento das Ci\u00eancias e da Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320322841","display_name":"Universidade de Macau","ror":"https://ror.org/01r4q9n85"},{"id":"https://openalex.org/F4320323893","display_name":"Fundo para o Desenvolvimento das Ci\u00eancias e da Tecnologia","ror":"https://ror.org/05vna4324"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388691873.pdf"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1915251500","https://openalex.org/W2018869373","https://openalex.org/W2122270629","https://openalex.org/W2561274697","https://openalex.org/W2591804103","https://openalex.org/W2612690371","https://openalex.org/W2790235966","https://openalex.org/W2888541716","https://openalex.org/W2889326796","https://openalex.org/W2890731353","https://openalex.org/W2945383715","https://openalex.org/W2948947170","https://openalex.org/W2951476960","https://openalex.org/W2951563833","https://openalex.org/W2962801832","https://openalex.org/W2963216553","https://openalex.org/W2963250244","https://openalex.org/W2963341956","https://openalex.org/W2963641307","https://openalex.org/W2963842982","https://openalex.org/W2963993537","https://openalex.org/W2970015022","https://openalex.org/W2970045405","https://openalex.org/W3001434439","https://openalex.org/W3006381853","https://openalex.org/W3017454464","https://openalex.org/W3034999214","https://openalex.org/W3036120435","https://openalex.org/W3104273515","https://openalex.org/W3104881680","https://openalex.org/W3105038888","https://openalex.org/W3155457266","https://openalex.org/W3169483174","https://openalex.org/W3173162544","https://openalex.org/W3199258042","https://openalex.org/W3202201199","https://openalex.org/W4298393544","https://openalex.org/W4322718191","https://openalex.org/W4385571124","https://openalex.org/W6608606290","https://openalex.org/W6631190155","https://openalex.org/W6640059789","https://openalex.org/W6677103787","https://openalex.org/W6685145238","https://openalex.org/W6712499988","https://openalex.org/W6739901393","https://openalex.org/W6745388339","https://openalex.org/W6748452836","https://openalex.org/W6766557392","https://openalex.org/W6771713106","https://openalex.org/W6795914504","https://openalex.org/W6797825753","https://openalex.org/W6850625674"],"related_works":["https://openalex.org/W2775554247","https://openalex.org/W2883671469","https://openalex.org/W2728761353","https://openalex.org/W2110168585","https://openalex.org/W3107474891","https://openalex.org/W2250213760","https://openalex.org/W4386247111","https://openalex.org/W4327642362","https://openalex.org/W2587014613","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Abstract":[0],"The":[1],"utilization":[2],"of":[3,26,48,63,87,90,99,117,125,133,168,171],"monolingual":[4,139,172],"data":[5,140,173],"has":[6],"been":[7],"shown":[8],"to":[9,60],"be":[10,71],"a":[11,84],"promising":[12],"strategy":[13],"for":[14,137,163],"addressing":[15],"low-resource":[16,143,145],"machine":[17,49,64,146,175],"translation":[18,50,65,110,147,157],"problems.":[19],"Previous":[20],"studies":[21],"have":[22],"demonstrated":[23],"the":[24,46,53,61,88,97,115,131,169],"effectiveness":[25],"techniques":[27,92],"such":[28],"as":[29],"back-translation":[30],"and":[31,41,67,109,120,144,166],"self-supervised":[32],"objectives,":[33],"including":[34,102],"masked":[35],"language":[36,39,104],"modeling,":[37,40],"causal":[38],"denoise":[42],"autoencoding,":[43],"in":[44,55,141,155,174],"improving":[45],"performance":[47,153],"models.":[51],"However,":[52],"manner":[54],"which":[56,159],"these":[57,91],"methods":[58],"contribute":[59],"success":[62],"tasks":[66],"how":[68],"they":[69],"can":[70],"effectively":[72],"combined":[73],"remains":[74],"an":[75],"under-researched":[76],"area.":[77],"In":[78],"this":[79],"study,":[80],"we":[81],"carry":[82],"out":[83],"systematic":[85],"investigation":[86],"effects":[89],"on":[93,123],"linguistic":[94],"properties":[95],"through":[96],"use":[98],"probing":[100],"tasks,":[101],"source":[103],"comprehension,":[105],"bilingual":[106],"word":[107],"alignment,":[108],"fluency.":[111],"We":[112],"further":[113,161],"evaluate":[114],"impact":[116],"pre-training,":[118],"back-translation,":[119],"multi-task":[121],"learning":[122],"bitexts":[124],"varying":[126],"sizes.":[127],"Our":[128],"findings":[129],"inform":[130],"design":[132],"more":[134],"effective":[135],"pipelines":[136],"leveraging":[138],"extremely":[142],"tasks.":[148],"Experiment":[149],"results":[150],"show":[151],"consistent":[152],"gains":[154],"seven":[156],"directions,":[158],"provide":[160],"support":[162],"our":[164],"conclusions":[165],"understanding":[167],"role":[170],"translation.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
