{"id":"https://openalex.org/W2995917377","doi":"https://doi.org/10.3390/e21121213","title":"Improving Neural Machine Translation by Filtering Synthetic Parallel Data","display_name":"Improving Neural Machine Translation by Filtering Synthetic Parallel Data","publication_year":2019,"publication_date":"2019-12-11","ids":{"openalex":"https://openalex.org/W2995917377","doi":"https://doi.org/10.3390/e21121213","mag":"2995917377"},"language":"en","primary_location":{"id":"doi:10.3390/e21121213","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e21121213","pdf_url":"https://www.mdpi.com/1099-4300/21/12/1213/pdf?version=1576046419","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1099-4300/21/12/1213/pdf?version=1576046419","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088562276","display_name":"Guanghao Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I148751991","display_name":"Sogang University","ror":"https://ror.org/056tn4839","country_code":"KR","type":"education","lineage":["https://openalex.org/I148751991"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Guanghao Xu","raw_affiliation_strings":["Department of Engineering, Computer Science, Sogang University, Seoul 04107, Korea"],"affiliations":[{"raw_affiliation_string":"Department of Engineering, Computer Science, Sogang University, Seoul 04107, Korea","institution_ids":["https://openalex.org/I148751991"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008710152","display_name":"Youngjoong Ko","orcid":"https://orcid.org/0000-0002-0241-9193"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Youngjoong Ko","raw_affiliation_strings":["Applied Data Science, Sungkyunkwan University, Suwon 16419, Korea"],"affiliations":[{"raw_affiliation_string":"Applied Data Science, Sungkyunkwan University, Suwon 16419, Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101975931","display_name":"Jungyun Seo","orcid":"https://orcid.org/0000-0003-3670-7334"},"institutions":[{"id":"https://openalex.org/I148751991","display_name":"Sogang University","ror":"https://ror.org/056tn4839","country_code":"KR","type":"education","lineage":["https://openalex.org/I148751991"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jungyun Seo","raw_affiliation_strings":["Department of Engineering, Computer Science, Sogang University, Seoul 04107, Korea"],"affiliations":[{"raw_affiliation_string":"Department of Engineering, Computer Science, Sogang University, Seoul 04107, Korea","institution_ids":["https://openalex.org/I148751991"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5008710152"],"corresponding_institution_ids":["https://openalex.org/I848706"],"apc_list":{"value":2000,"currency":"CHF","value_usd":2165},"apc_paid":{"value":2000,"currency":"CHF","value_usd":2165},"fwci":0.867,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.81734932,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"21","issue":"12","first_page":"1213","last_page":"1213"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.8660721778869629},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.809109091758728},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.6673828959465027},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6530244946479797},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6134941577911377},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6091500520706177},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.594673752784729},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5592928528785706},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5181090235710144},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4908173978328705},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47779732942581177},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.47333747148513794},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3997894525527954},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09513068199157715},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08204078674316406},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.06900990009307861}],"concepts":[{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.8660721778869629},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.809109091758728},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.6673828959465027},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6530244946479797},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6134941577911377},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6091500520706177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.594673752784729},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5592928528785706},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5181090235710144},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4908173978328705},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47779732942581177},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.47333747148513794},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3997894525527954},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09513068199157715},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08204078674316406},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.06900990009307861},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/e21121213","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e21121213","pdf_url":"https://www.mdpi.com/1099-4300/21/12/1213/pdf?version=1576046419","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},{"id":"pmh:oai:mdpi.com:/1099-4300/21/12/1213/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/e21121213","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Entropy","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:7514558","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7514558","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Entropy (Basel)","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/e21121213","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e21121213","pdf_url":"https://www.mdpi.com/1099-4300/21/12/1213/pdf?version=1576046419","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2992462654","display_name":null,"funder_award_id":"2017-0-00255","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2995917377.pdf","grobid_xml":"https://content.openalex.org/works/W2995917377.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1902237438","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2129734311","https://openalex.org/W2130942839","https://openalex.org/W2136156618","https://openalex.org/W2251765408","https://openalex.org/W2294774419","https://openalex.org/W2493916176","https://openalex.org/W2595715041","https://openalex.org/W2597891111","https://openalex.org/W2741602058","https://openalex.org/W2786253471","https://openalex.org/W2788353357","https://openalex.org/W2888808532","https://openalex.org/W2902918014","https://openalex.org/W2962784628","https://openalex.org/W2963212250","https://openalex.org/W2963216553","https://openalex.org/W2963281280","https://openalex.org/W2964266061","https://openalex.org/W3201817155","https://openalex.org/W6739189055","https://openalex.org/W6774062504"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W2375873920","https://openalex.org/W3151736118","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W4362495644","https://openalex.org/W2392760275","https://openalex.org/W2083530853","https://openalex.org/W2883671469","https://openalex.org/W2728761353"],"abstract_inverted_index":{"Synthetic":[0],"data":[1,19,26],"has":[2],"been":[3],"shown":[4],"to":[5,55,85,121],"be":[6],"effective":[7],"in":[8],"training":[9],"state-of-the-art":[10],"neural":[11],"machine":[12],"translation":[13,45,101],"(NMT)":[14],"systems.":[15],"Because":[16],"the":[17,28,32,67,97,113],"synthetic":[18,60,68,91],"is":[20],"often":[21],"generated":[22],"by":[23,119],"back-translating":[24],"monolingual":[25],"from":[27,59],"target":[29],"language":[30],"into":[31],"source":[33],"language,":[34],"it":[35],"potentially":[36],"contains":[37],"a":[38,52,72],"lot":[39],"of":[40,66],"noise\u2014weakly":[41],"paired":[42],"sentences":[43],"or":[44],"errors.":[46],"In":[47],"this":[48,57],"paper,":[49],"we":[50,70,88],"propose":[51],"novel":[53],"approach":[54],"filter":[56],"noise":[58],"data.":[61,93],"For":[62],"each":[63],"sentence":[64,82],"pair":[65],"data,":[69,109],"compute":[71],"semantic":[73],"similarity":[74],"score":[75],"using":[76,106],"bilingual":[77],"word":[78],"embeddings.":[79],"By":[80],"selecting":[81],"pairs":[83],"according":[84],"these":[86],"scores,":[87],"obtain":[89],"better":[90],"parallel":[92],"Experimental":[94],"results":[95],"on":[96],"IWSLT":[98],"2017":[99],"Korean\u2192English":[100],"task":[102],"show":[103],"that":[104],"despite":[105],"much":[107],"less":[108],"our":[110],"method":[111],"outperforms":[112],"baseline":[114],"NMT":[115],"system":[116],"with":[117],"back-translation":[118],"up":[120],"0.72":[122],"and":[123,129],"0.62":[124],"Bleu":[125],"points":[126],"for":[127],"tst2016":[128],"tst2017,":[130],"respectively.":[131]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
