{"id":"https://openalex.org/W4417337073","doi":"https://doi.org/10.1109/tbdata.2025.3644594","title":"Benchmarking LLMs Against Human Translators: A Comprehensive Evaluation Across Languages, Domains, and Expertise Levels","display_name":"Benchmarking LLMs Against Human Translators: A Comprehensive Evaluation Across Languages, Domains, and Expertise Levels","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W4417337073","doi":"https://doi.org/10.1109/tbdata.2025.3644594"},"language":null,"primary_location":{"id":"doi:10.1109/tbdata.2025.3644594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbdata.2025.3644594","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101123756","display_name":"Jianhao Yan","orcid":"https://orcid.org/0000-0002-5670-1207"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhao Yan","raw_affiliation_strings":["Zhejiang University, Hangzhou, China","Zhejiang University, Zhejiang, P.R. China"],"raw_orcid":"https://orcid.org/0009-0005-2895-1822","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]},{"raw_affiliation_string":"Zhejiang University, Zhejiang, P.R. China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101394593","display_name":"Pingchuan Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pingchuan Yan","raw_affiliation_strings":["University College London, London, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100777438","display_name":"Yulong Chen","orcid":"https://orcid.org/0000-0002-4960-5241"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yulong Chen","raw_affiliation_strings":["Department of Computer Science and Technology, University of Cambridge, Cambridge, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, University of Cambridge, Cambridge, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337132","display_name":"Jing Li","orcid":"https://orcid.org/0000-0003-4602-3227"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing Li","raw_affiliation_strings":["LanBridge Group, Sichuan, China","LanBridge Group, Sichuan, P.R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LanBridge Group, Sichuan, China","institution_ids":[]},{"raw_affiliation_string":"LanBridge Group, Sichuan, P.R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030505771","display_name":"Xianchao Zhu","orcid":"https://orcid.org/0000-0002-5401-1552"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xianchao Zhu","raw_affiliation_strings":["LanBridge Group, Sichuan, China","LanBridge Group, Sichuan, P.R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LanBridge Group, Sichuan, China","institution_ids":[]},{"raw_affiliation_string":"LanBridge Group, Sichuan, P.R. China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100333758","display_name":"Yue Zhang","orcid":"https://orcid.org/0000-0002-8213-1420"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Zhang","raw_affiliation_strings":["School of Engineering, Westlake University, Hangzhou, China","School of Engineering, Westlake University, Zhejiang, P.R. China"],"raw_orcid":"https://orcid.org/0000-0002-5214-2268","affiliations":[{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]},{"raw_affiliation_string":"School of Engineering, Westlake University, Zhejiang, P.R. China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101123756"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":9.8338,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97874996,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"12","issue":"3","first_page":"801","last_page":"813"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7351999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7351999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09070000052452087,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.046300001442432404,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lagging","display_name":"Lagging","score":0.7035999894142151},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5964999794960022},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5418000221252441},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.531000018119812},{"id":"https://openalex.org/keywords/literal-translation","display_name":"Literal translation","score":0.5292999744415283},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.42480000853538513},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.40720000863075256},{"id":"https://openalex.org/keywords/language-industry","display_name":"Language industry","score":0.3506999909877777}],"concepts":[{"id":"https://openalex.org/C2776962539","wikidata":"https://www.wikidata.org/wiki/Q6472078","display_name":"Lagging","level":2,"score":0.7035999894142151},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6100999712944031},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5964999794960022},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5418000221252441},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.531000018119812},{"id":"https://openalex.org/C2777761643","wikidata":"https://www.wikidata.org/wiki/Q1191837","display_name":"Literal translation","level":3,"score":0.5292999744415283},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.42480000853538513},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.40720000863075256},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37290000915527344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3515999913215637},{"id":"https://openalex.org/C1813318","wikidata":"https://www.wikidata.org/wiki/Q2465887","display_name":"Language industry","level":4,"score":0.3506999909877777},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.31709998846054077},{"id":"https://openalex.org/C2993724205","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Human language","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C122783720","wikidata":"https://www.wikidata.org/wiki/Q183065","display_name":"Interpreter","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2838999927043915},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C83867959","wikidata":"https://www.wikidata.org/wiki/Q371467","display_name":"Scopus","level":3,"score":0.2815000116825104},{"id":"https://openalex.org/C2780882242","wikidata":"https://www.wikidata.org/wiki/Q14235582","display_name":"Literal (mathematical logic)","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.27230000495910645},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C2778883600","wikidata":"https://www.wikidata.org/wiki/Q2390977","display_name":"Language proficiency","level":2,"score":0.2653000056743622}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tbdata.2025.3644594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbdata.2025.3644594","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,146],"study":[1,147],"presents":[2,148],"a":[3,149],"comprehensive":[4],"evaluation":[5,30],"of":[6,10,23,81,170],"the":[7,32,165],"translation":[8,110,125,172],"capabilities":[9,167],"existing":[11],"LLMs,":[12],"such":[13],"as":[14],"GPT-4,":[15],"ALMA-R,":[16],"and":[17,54,61,66,133,143,154,168],"Deepseek-R1,":[18],"compared":[19],"to":[20,76],"human":[21,29,138,155],"translators":[22,78,139,156],"varying":[24],"expertise":[25],"levels.":[26],"Through":[27,117],"systematic":[28,150],"using":[31],"MQM":[33],"schema,":[34],"we":[35,120],"assess":[36],"translations":[37,132],"across":[38,112,157],"three":[39,62],"language":[40,103,115],"pairs":[41],"(Chinese":[42],"<inline-formula":[43,49,56],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[44,50,57],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[45,51,58],"notation=\"LaTeX\">$\\longleftrightarrow$</tex-math></inline-formula>":[46,52,59],"English,":[47,53],"Russian":[48],"Chinese":[55],"Hindi)":[60],"domains":[63],"(News,":[64],"Technology,":[65],"Biomedical).":[67],"Our":[68],"findings":[69],"reveal":[70],"that":[71],"LLMs":[72,105,153],"achieve":[73],"performance":[74,99],"comparable":[75],"junior-level":[77],"in":[79,101,124],"terms":[80],"total":[82],"errors,":[83],"while":[84,137],"still":[85],"lagging":[86],"behind":[87],"senior":[88],"translators.":[89],"Unlike":[90],"traditional":[91],"Neural":[92],"Machine":[93],"Translation":[94],"systems,":[95],"which":[96],"show":[97],"significant":[98],"degradation":[100],"resource-poor":[102],"directions,":[104],"like":[106],"GPT-4":[107,127],"maintain":[108],"consistent":[109],"quality":[111],"all":[113],"evaluated":[114],"pairs.":[116],"qualitative":[118],"analysis,":[119],"identify":[121],"distinctive":[122],"patterns":[123],"approaches:":[126],"tends":[128],"toward":[129],"overly":[130],"literal":[131],"exhibits":[134],"lexical":[135],"inconsistency,":[136],"sometimes":[140],"over-interpret":[141],"context":[142],"introduce":[144],"hallucinations.":[145],"comparison":[151],"between":[152],"different":[158],"proficiency":[159],"levels,":[160],"providing":[161],"valuable":[162],"insights":[163],"into":[164],"current":[166],"limitations":[169],"LLM-based":[171],"systems.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-15T06:05:50.897203","created_date":"2025-12-15T00:00:00"}
