{"id":"https://openalex.org/W2955770773","doi":"https://doi.org/10.1162/coli_a_00356","title":"Taking MT Evaluation Metrics to Extremes: Beyond Correlation with Human Judgments","display_name":"Taking MT Evaluation Metrics to Extremes: Beyond Correlation with Human Judgments","publication_year":2019,"publication_date":"2019-06-25","ids":{"openalex":"https://openalex.org/W2955770773","doi":"https://doi.org/10.1162/coli_a_00356","mag":"2955770773"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00356","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00356","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/515/1847409/coli_a_00356.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/45/3/515/1847409/coli_a_00356.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021180431","display_name":"Marina Fomicheva","orcid":null},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Marina Fomicheva","raw_affiliation_strings":["University of Sheffield"],"affiliations":[{"raw_affiliation_string":"University of Sheffield","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053217291","display_name":"Lucia Specia","orcid":"https://orcid.org/0000-0002-5495-3128"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lucia Specia","raw_affiliation_strings":["Imperial College London, University of Sheffield"],"affiliations":[{"raw_affiliation_string":"Imperial College London, University of Sheffield","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5021180431","https://openalex.org/A5053217291"],"corresponding_institution_ids":["https://openalex.org/I91136226"],"apc_list":null,"apc_paid":null,"fwci":1.5893,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.87734439,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"45","issue":"3","first_page":"515","last_page":"558"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7841326594352722},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.736913800239563},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6351735591888428},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6189032196998596},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.5617794990539551},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5204280018806458},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.47555941343307495},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47249236702919006},{"id":"https://openalex.org/keywords/pearson-product-moment-correlation-coefficient","display_name":"Pearson product-moment correlation coefficient","score":0.45571306347846985},{"id":"https://openalex.org/keywords/correlation-coefficient","display_name":"Correlation coefficient","score":0.4258069097995758},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.42166996002197266},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36156153678894043},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.20455005764961243},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08890584111213684},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.06852927803993225}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7841326594352722},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.736913800239563},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6351735591888428},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6189032196998596},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.5617794990539551},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5204280018806458},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.47555941343307495},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47249236702919006},{"id":"https://openalex.org/C55078378","wikidata":"https://www.wikidata.org/wiki/Q1136628","display_name":"Pearson product-moment correlation coefficient","level":2,"score":0.45571306347846985},{"id":"https://openalex.org/C2780092901","wikidata":"https://www.wikidata.org/wiki/Q3433612","display_name":"Correlation coefficient","level":2,"score":0.4258069097995758},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.42166996002197266},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36156153678894043},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.20455005764961243},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08890584111213684},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06852927803993225},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1162/coli_a_00356","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00356","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/515/1847409/coli_a_00356.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:152594","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:ac4bc9aad33d4e7fb18cc6cf6e6474ed","is_oa":true,"landing_page_url":"https://doaj.org/article/ac4bc9aad33d4e7fb18cc6cf6e6474ed","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 45, Iss 3, Pp 515-558 (2019)","raw_type":"article"},{"id":"pmh:oai:spiral.imperial.ac.uk:10044/1/79480","is_oa":true,"landing_page_url":"http://hdl.handle.net/10044/1/79480","pdf_url":null,"source":{"id":"https://openalex.org/S4306401396","display_name":"Spiral (Imperial College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I47508984","host_organization_name":"Imperial College London","host_organization_lineage":["https://openalex.org/I47508984"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"558","raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00356","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00356","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/515/1847409/coli_a_00356.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2955770773.pdf","grobid_xml":"https://content.openalex.org/works/W2955770773.grobid-xml"},"referenced_works_count":91,"referenced_works":["https://openalex.org/W73274768","https://openalex.org/W95780973","https://openalex.org/W606759049","https://openalex.org/W1419370637","https://openalex.org/W1489525520","https://openalex.org/W1529616844","https://openalex.org/W1530821417","https://openalex.org/W1576726980","https://openalex.org/W1597941798","https://openalex.org/W1603508585","https://openalex.org/W1654441844","https://openalex.org/W1965888476","https://openalex.org/W1996456980","https://openalex.org/W1998897826","https://openalex.org/W2004945083","https://openalex.org/W2015333112","https://openalex.org/W2016274652","https://openalex.org/W2032494091","https://openalex.org/W2033298685","https://openalex.org/W2078861931","https://openalex.org/W2080081864","https://openalex.org/W2088781183","https://openalex.org/W2089450762","https://openalex.org/W2098507980","https://openalex.org/W2100050071","https://openalex.org/W2101105183","https://openalex.org/W2101793943","https://openalex.org/W2108325777","https://openalex.org/W2117278770","https://openalex.org/W2123301721","https://openalex.org/W2124479570","https://openalex.org/W2125712079","https://openalex.org/W2127331160","https://openalex.org/W2130942839","https://openalex.org/W2132863434","https://openalex.org/W2132873493","https://openalex.org/W2133459682","https://openalex.org/W2133564696","https://openalex.org/W2138679451","https://openalex.org/W2142112143","https://openalex.org/W2143331230","https://openalex.org/W2144746247","https://openalex.org/W2147192413","https://openalex.org/W2149327368","https://openalex.org/W2152311128","https://openalex.org/W2153677005","https://openalex.org/W2159107349","https://openalex.org/W2160001241","https://openalex.org/W2161261736","https://openalex.org/W2165714389","https://openalex.org/W2165894879","https://openalex.org/W2186839874","https://openalex.org/W2250208866","https://openalex.org/W2250342921","https://openalex.org/W2250597803","https://openalex.org/W2250875036","https://openalex.org/W2251117546","https://openalex.org/W2251610689","https://openalex.org/W2252166243","https://openalex.org/W2260677151","https://openalex.org/W2276787968","https://openalex.org/W2294699749","https://openalex.org/W2508117065","https://openalex.org/W2508316494","https://openalex.org/W2511254834","https://openalex.org/W2511950321","https://openalex.org/W2512924740","https://openalex.org/W2517773427","https://openalex.org/W2521118911","https://openalex.org/W2527845440","https://openalex.org/W2552556052","https://openalex.org/W2559655401","https://openalex.org/W2573119710","https://openalex.org/W2576638397","https://openalex.org/W2600463316","https://openalex.org/W2613253298","https://openalex.org/W2624521690","https://openalex.org/W2757980860","https://openalex.org/W2760656271","https://openalex.org/W2900013662","https://openalex.org/W2915756181","https://openalex.org/W2916548775","https://openalex.org/W2962801832","https://openalex.org/W2963463964","https://openalex.org/W3121179723","https://openalex.org/W3123068586","https://openalex.org/W3143261911","https://openalex.org/W3203035525","https://openalex.org/W4205896512","https://openalex.org/W4206021986","https://openalex.org/W4247545505"],"related_works":["https://openalex.org/W357196361","https://openalex.org/W3109425891","https://openalex.org/W1036938216","https://openalex.org/W2113714434","https://openalex.org/W2377792686","https://openalex.org/W2027314909","https://openalex.org/W4200439127","https://openalex.org/W829658220","https://openalex.org/W3096637473","https://openalex.org/W2946560178"],"abstract_inverted_index":{"Automatic":[0],"Machine":[1],"Translation":[2],"(MT)":[3],"evaluation":[4,51,98,204],"is":[5,172,209],"an":[6,127],"active":[7],"field":[8],"of":[9,14,28,36,50,72,90,93,97,110,118,145],"research,":[10],"with":[11,31,39,58,115,153],"a":[12,55,86,94,122],"handful":[13],"new":[15],"metrics":[16,21,52,99,112,182,208],"devised":[17],"every":[18],"year.":[19],"Evaluation":[20],"are":[22,157,183,217],"generally":[23],"benchmarked":[24],"against":[25],"manual":[26],"assessment":[27],"translation":[29,119],"quality,":[30,120],"performance":[32,92,109,139],"measured":[33],"in":[34,78,202],"terms":[35],"overall":[37],"correlation":[38,57,133],"human":[40,59],"scores.":[41],"Much":[42],"work":[43,83],"has":[44,64],"been":[45,65],"dedicated":[46],"to":[47,53,129,160],"the":[48,68,91,108,111,130,191,200,203,213],"improvement":[49],"achieve":[54],"higher":[56],"judgments.":[60],"However,":[61],"little":[62],"insight":[63],"provided":[66],"regarding":[67],"weaknesses":[69],"and":[70,75,156],"strengths":[71],"existing":[73],"approaches":[74],"their":[76],"behavior":[77],"different":[79,116,143,207,220],"settings.":[80],"In":[81],"this":[82],"we":[84,106,166,179,197],"conduct":[85],"broad":[87],"meta-evaluation":[88],"study":[89],"wide":[95],"range":[96],"focusing":[100],"on":[101,219],"three":[102],"major":[103],"aspects.":[104],"First,":[105],"analyze":[107],"when":[113,151,186],"faced":[114,152],"levels":[117,144],"proposing":[121],"local":[123],"dependency":[124],"measure":[125],"as":[126],"alternative":[128],"standard,":[131],"global":[132],"coefficient.":[134],"We":[135],"show":[136,167,180,198],"that":[137,168,181,199],"metric":[138],"varies":[140],"significantly":[141],"across":[142],"MT":[146,189,194],"quality:":[147],"Metrics":[148],"perform":[149],"poorly":[150],"low-quality":[154,170],"translations":[155,171],"not":[158],"able":[159],"capture":[161],"nuanced":[162],"quality":[163],"distinctions.":[164],"Interestingly,":[165],"evaluating":[169,187],"also":[173],"more":[174,184],"challenging":[175],"for":[176,206],"humans.":[177],"Second,":[178],"reliable":[185],"neural":[188],"than":[190],"traditional":[192],"statistical":[193],"systems.":[195],"Finally,":[196],"difference":[201],"accuracy":[205],"maintained":[210],"even":[211],"if":[212],"gold":[214],"standard":[215],"scores":[216],"based":[218],"criteria.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
