{"id":"https://openalex.org/W2970963247","doi":"https://doi.org/10.18653/v1/w19-5441","title":"The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task","display_name":"The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970963247","doi":"https://doi.org/10.18653/v1/w19-5441","mag":"2970963247"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-5441","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5441","pdf_url":"https://www.aclweb.org/anthology/W19-5441.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-5441.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047023030","display_name":"Ra\u00fal V\u00e1zquez","orcid":null},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Ra\u00fal V\u00e1zquez","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060871343","display_name":"Umut Sulubacak","orcid":"https://orcid.org/0000-0002-4929-4850"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Umut Sulubacak","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082417280","display_name":"J\u00f6rg Tiedemann","orcid":"https://orcid.org/0000-0003-3065-7989"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"J\u00f6rg Tiedemann","raw_affiliation_strings":["University of Helsinki"],"affiliations":[{"raw_affiliation_string":"University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5047023030"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.5781,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.75979987,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"294","last_page":"300"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7957472205162048},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.756206750869751},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7162635326385498},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.7040375471115112},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6715191602706909},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6087408065795898},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5492066740989685},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.443654328584671},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.43787139654159546},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4130387306213379},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08522054553031921}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7957472205162048},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.756206750869751},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7162635326385498},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.7040375471115112},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6715191602706909},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6087408065795898},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5492066740989685},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.443654328584671},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.43787139654159546},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4130387306213379},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08522054553031921},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w19-5441","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5441","pdf_url":"https://www.aclweb.org/anthology/W19-5441.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},{"id":"pmh:oai:helda.helsinki.fi:10138/305139","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/305139","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference contribution"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-5441","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5441","pdf_url":"https://www.aclweb.org/anthology/W19-5441.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7599999904632568}],"awards":[{"id":"https://openalex.org/G232687775","display_name":null,"funder_award_id":"Horizon 2020 Research and Innovation Programme (gr","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4459410915","display_name":"Found in Translation \u2013 Natural Language Understanding with Cross-Lingual Grounding","funder_award_id":"771113","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5036817778","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innov","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5861945474","display_name":"Methods for Managing Audiovisual Data: Combining  Automatic Efficiency with Human Accuracy","funder_award_id":"780069","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7842005466","display_name":null,"funder_award_id":"Horizon 2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8633428685","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innovat","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970963247.pdf","grobid_xml":"https://content.openalex.org/works/W2970963247.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W120322644","https://openalex.org/W2156700117","https://openalex.org/W2168175751","https://openalex.org/W2169200297","https://openalex.org/W2538358357","https://openalex.org/W2595715041","https://openalex.org/W2902918014","https://openalex.org/W2962784628","https://openalex.org/W2963919854","https://openalex.org/W3149745985","https://openalex.org/W4379510236"],"related_works":["https://openalex.org/W2180954594","https://openalex.org/W2052835778","https://openalex.org/W2049003611","https://openalex.org/W2108418243","https://openalex.org/W2127804977","https://openalex.org/W164103134","https://openalex.org/W2787352659","https://openalex.org/W1970611213","https://openalex.org/W4206560911","https://openalex.org/W4372260270"],"abstract_inverted_index":{"This":[0,56],"paper":[1],"describes":[2],"the":[3,12,37],"University":[4],"of":[5,33],"Helsinki":[6],"Language":[7],"Technology":[8],"group's":[9],"participation":[10],"in":[11],"WMT":[13],"2019":[14],"parallel":[15],"corpus":[16],"filtering":[17],"task.":[18],"Our":[19],"scores":[20,44],"were":[21],"produced":[22,43],"using":[23],"a":[24,31,53,62],"two-step":[25],"strategy.":[26],"First,":[27],"we":[28,42],"individually":[29],"applied":[30],"series":[32],"filters":[34],"to":[35,60,71],"remove":[36],"'bad'":[38],"quality":[39],"sentences.":[40],"Then,":[41],"for":[45],"each":[46],"sentence":[47],"by":[48],"weighting":[49],"these":[50],"features":[51],"with":[52],"classification":[54],"model.":[55],"methodology":[57],"allowed":[58],"us":[59],"build":[61],"simple":[63],"and":[64],"reliable":[65],"system":[66],"that":[67],"is":[68],"easily":[69],"adaptable":[70],"other":[72],"language":[73],"pairs.":[74]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
