{"id":"https://openalex.org/W2970117938","doi":"https://doi.org/10.18653/v1/w19-5439","title":"Filtering of Noisy Parallel Corpora Based on Hypothesis Generation","display_name":"Filtering of Noisy Parallel Corpora Based on Hypothesis Generation","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970117938","doi":"https://doi.org/10.18653/v1/w19-5439","mag":"2970117938"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-5439","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5439","pdf_url":"https://www.aclweb.org/anthology/W19-5439.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-5439.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088737726","display_name":"Zuzanna Parcheta","orcid":"https://orcid.org/0000-0002-3490-6068"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zuzanna Parcheta","raw_affiliation_strings":["Sciling S.L.,Carrer del Riu 321, Pinedo, 46012, Spain"],"affiliations":[{"raw_affiliation_string":"Sciling S.L.,Carrer del Riu 321, Pinedo, 46012, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059340686","display_name":"Germ\u00e1n Sanchis-Trilles","orcid":"https://orcid.org/0000-0003-0019-6018"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Germ\u00e1n Sanchis-Trilles","raw_affiliation_strings":["Sciling S.L.,Carrer del Riu 321, Pinedo, 46012, Spain"],"affiliations":[{"raw_affiliation_string":"Sciling S.L.,Carrer del Riu 321, Pinedo, 46012, Spain","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020181962","display_name":"Francisco Casacuberta","orcid":"https://orcid.org/0000-0002-8497-5598"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Francisco Casacuberta","raw_affiliation_strings":["PRHLT Research Center, Camino de Vera s/n, 46022 Valencia, Spain"],"affiliations":[{"raw_affiliation_string":"PRHLT Research Center, Camino de Vera s/n, 46022 Valencia, Spain","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088737726"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4335,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72029638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"282","last_page":"288"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8851821422576904},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7853188514709473},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7240304946899414},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7018765211105347},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5432682037353516},{"id":"https://openalex.org/keywords/nepali","display_name":"Nepali","score":0.5425487160682678},{"id":"https://openalex.org/keywords/bleu","display_name":"BLEU","score":0.5402687788009644},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5205893516540527},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.5135478973388672},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5108959078788757},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48570021986961365},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42567238211631775},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.42237016558647156},{"id":"https://openalex.org/keywords/language-translation","display_name":"Language translation","score":0.4179341495037079}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8851821422576904},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7853188514709473},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7240304946899414},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7018765211105347},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5432682037353516},{"id":"https://openalex.org/C2780068402","wikidata":"https://www.wikidata.org/wiki/Q33823","display_name":"Nepali","level":2,"score":0.5425487160682678},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.5402687788009644},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5205893516540527},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.5135478973388672},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5108959078788757},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48570021986961365},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42567238211631775},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.42237016558647156},{"id":"https://openalex.org/C2986862884","wikidata":"https://www.wikidata.org/wiki/Q7553","display_name":"Language translation","level":3,"score":0.4179341495037079},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w19-5439","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5439","pdf_url":"https://www.aclweb.org/anthology/W19-5439.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},{"id":"pmh:oai:riunet.upv.es:10251/180620","is_oa":true,"landing_page_url":"http://hdl.handle.net/10251/180620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400639","display_name":"RiuNet (Universitat Polit\u00e8cnica de Val\u00e8ncia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I60053951","host_organization_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","host_organization_lineage":["https://openalex.org/I60053951"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-5439","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5439","pdf_url":"https://www.aclweb.org/anthology/W19-5439.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.800000011920929}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970117938.pdf","grobid_xml":"https://content.openalex.org/works/W2970117938.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1905522558","https://openalex.org/W2101105183","https://openalex.org/W2108325777","https://openalex.org/W2117278770","https://openalex.org/W2124807415","https://openalex.org/W2595715041","https://openalex.org/W2810533336","https://openalex.org/W2904977935","https://openalex.org/W2913659301","https://openalex.org/W2933138175","https://openalex.org/W2962784628","https://openalex.org/W2963532001","https://openalex.org/W2964121744","https://openalex.org/W4288601832"],"related_works":["https://openalex.org/W2099607809","https://openalex.org/W2395641992","https://openalex.org/W3021126373","https://openalex.org/W2807475932","https://openalex.org/W4280571180","https://openalex.org/W3066373881","https://openalex.org/W2903057408","https://openalex.org/W92588874","https://openalex.org/W2426188458","https://openalex.org/W2963991316"],"abstract_inverted_index":{"The":[0,195],"filtering":[1,15,34,197],"task":[2,173],"of":[3,42,188],"noisy":[4,31,105,137],"parallel":[5,32,59,73],"corpora":[6,33,74,106],"in":[7,50,186,191],"WMT2019":[8],"aims":[9],"to":[10,13,17,79,134],"challenge":[11],"participants":[12],"create":[14,62],"methods":[16],"be":[18],"useful":[19],"for":[20,89,111],"training":[21],"machine":[22,209],"translation":[23,44,48,66,98,145],"systems.":[24],"In":[25,128],"this":[26],"work,":[27],"we":[28,68,84,102,130,182],"introduce":[29],"a":[30,43,109],"system":[35,198],"based":[36,150],"on":[37,151,170],"generating":[38],"hypotheses":[39],"by":[40],"means":[41],"model.":[45],"We":[46,115,162],"train":[47],"models":[49,99],"both":[51,90],"language":[52,91,160],"pairs:":[53],"Nepali-English":[54],"and":[55,77,81,94,107,125,155,158,202],"Sinhala-English":[56],"using":[57,207],"provided":[58,72],"corpora.":[60],"To":[61],"the":[63,97,104,117,122,144,167,171,177,189,192],"best":[64],"possible":[65],"model,":[67,179],"first":[69],"join":[70],"all":[71,203],"(Nepali,":[75],"Sinhala":[76],"Hindi":[78],"English)":[80],"after":[82],"that,":[83],"applied":[85],"bilingual":[86],"cross-entropy":[87],"selection":[88],"pairs":[92],"(Nepali-English":[93],"Sinhala-English).":[95],"Once":[96],"are":[100,149,205],"trained,":[101],"translate":[103],"generate":[108],"hypothesis":[110],"each":[112],"sentence":[113,124,152],"pair.":[114],"compute":[116],"smoothed":[118],"BLEU":[119],"score":[120],"between":[121],"target":[123,156],"generated":[126],"hypothesis.":[127],"addition,":[129],"apply":[131],"several":[132],"rules":[133],"discard":[135],"very":[136],"or":[138],"inadequate":[139],"sentences":[140],"which":[141,175,181],"can":[142],"lower":[143],"score.":[146],"These":[147],"heuristics":[148],"length,":[153],"source":[154,159],"similarity":[157],"detection.":[161],"compare":[163],"our":[164],"results":[165],"with":[166],"baseline":[168],"published":[169],"shared":[172,193],"website,":[174],"uses":[176],"Zipporah":[178],"over":[180],"achieve":[183],"significant":[184],"improvements":[185],"one":[187],"conditions":[190],"task.":[194],"designed":[196],"is":[199],"domain":[200],"independent":[201],"experiments":[204],"conducted":[206],"neural":[208],"translation.":[210]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
