{"id":"https://openalex.org/W2903182367","doi":"https://doi.org/10.18653/v1/w18-6480","title":"Measuring sentence parallelism using Mahalanobis distances: The NRC unsupervised submissions to the WMT18 Parallel Corpus Filtering shared task","display_name":"Measuring sentence parallelism using Mahalanobis distances: The NRC unsupervised submissions to the WMT18 Parallel Corpus Filtering shared task","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2903182367","doi":"https://doi.org/10.18653/v1/w18-6480","mag":"2903182367"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w18-6480","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6480","pdf_url":"https://www.aclweb.org/anthology/W18-6480.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Shared Task Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W18-6480.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045031910","display_name":"Patrick Littell","orcid":"https://orcid.org/0000-0002-7173-0225"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Patrick Littell","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084782140","display_name":"Samuel Larkin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Samuel Larkin","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103359343","display_name":"Darlene Stewart","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Darlene Stewart","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111588711","display_name":"Michel Simard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Michel Simard","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065257553","display_name":"Cyril Goutte","orcid":"https://orcid.org/0000-0003-4939-6555"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Cyril Goutte","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049492975","display_name":"Chi-kiu Lo","orcid":"https://orcid.org/0000-0001-8714-7846"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Chi-kiu Lo","raw_affiliation_strings":["National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada 1200 Montreal Road, Ottawa ON, K1A 0R6","institution_ids":["https://openalex.org/I4210159778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5045031910"],"corresponding_institution_ids":["https://openalex.org/I4210159778"],"apc_list":null,"apc_paid":null,"fwci":1.3513,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.8618879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"900","last_page":"907"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7958659529685974},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7082462310791016},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.681513786315918},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5806421041488647},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5708909630775452},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5535359382629395},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5525274276733398},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.410684734582901},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3741545081138611},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15674617886543274},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.11474615335464478}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7958659529685974},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7082462310791016},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.681513786315918},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5806421041488647},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5708909630775452},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5535359382629395},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5525274276733398},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.410684734582901},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3741545081138611},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15674617886543274},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.11474615335464478},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w18-6480","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6480","pdf_url":"https://www.aclweb.org/anthology/W18-6480.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Shared Task Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w18-6480","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6480","pdf_url":"https://www.aclweb.org/anthology/W18-6480.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Shared Task Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2903182367.pdf","grobid_xml":"https://content.openalex.org/works/W2903182367.grobid-xml"},"referenced_works_count":10,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W331889179","https://openalex.org/W635530177","https://openalex.org/W1540596182","https://openalex.org/W2047870694","https://openalex.org/W2134800885","https://openalex.org/W2167925143","https://openalex.org/W2605035112","https://openalex.org/W2902918014","https://openalex.org/W2903297715"],"related_works":["https://openalex.org/W2045183646","https://openalex.org/W2162409446","https://openalex.org/W2109463584","https://openalex.org/W2504075107","https://openalex.org/W2375873920","https://openalex.org/W2354058185","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W2392760275","https://openalex.org/W2083530853"],"abstract_inverted_index":{"The":[0],"WMT18":[1],"shared":[2],"task":[3],"on":[4],"parallel":[5,24,71,94],"corpus":[6,25,49,62,119],"filtering":[7],"(Koehn":[8,26],"et":[9,27],"al.,":[10,28],"2018b)":[11],"challenged":[12],"teams":[13],"to":[14,44],"score":[15],"sentence":[16],"pairs":[17],"from":[18],"a":[19,41,47],"large":[20],"highrecall,":[21],"low-precision":[22],"web-scraped":[23],"2018a).":[29],"Participants":[30],"could":[31],"use":[32],"existing":[33],"sample":[34,69],"corpora":[35],"(e.g.":[36],"past":[37],"WMT":[38],"data)":[39],"as":[40],"supervisory":[42],"signal":[43],"learn":[45],"what":[46],"\"clean\"":[48],"looks":[50],"like.":[51],"However,":[52],"in":[53,73,102,107,127],"lowerresource":[54],"situations":[55],"it":[56],"often":[57],"happens":[58],"that":[59,74,87],"the":[60,64,67,91,103,108,115,123,128],"target":[61],"of":[63,70],"language":[65],"is":[66],"only":[68],"text":[72],"language.":[75],"We":[76],"therefore":[77],"made":[78],"several":[79],"unsupervised":[80],"entries,":[81],"setting":[82],"ourselves":[83],"an":[84],"additional":[85,92],"constraint":[86],"we":[88],"not":[89],"utilize":[90],"clean":[93],"corpora.":[95],"One":[96],"such":[97],"entry":[98],"fairly":[99],"consistently":[100],"scored":[101],"top":[104],"ten":[105],"systems":[106,125],"100M-word":[109],"conditions,":[110],"and":[111],"for":[112],"one":[113],"task-translating":[114],"European":[116],"Medicines":[117],"Agency":[118],"(Tiedemann,":[120],"2009)-scored":[121],"among":[122],"best":[124],"even":[126],"10M-word":[129],"conditions.":[130]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2026-03-04T07:04:00.330322","created_date":"2025-10-10T00:00:00"}
