{"id":"https://openalex.org/W7133200558","doi":"https://doi.org/10.48550/arxiv.2602.24119","title":"Terminology Rarity Predicts Catastrophic Failure in LLM Translation of Low-Resource Ancient Languages: Evidence from Ancient Greek","display_name":"Terminology Rarity Predicts Catastrophic Failure in LLM Translation of Low-Resource Ancient Languages: Evidence from Ancient Greek","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7133200558","doi":"https://doi.org/10.48550/arxiv.2602.24119"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.24119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127788702","display_name":"James L. Zainaldin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zainaldin, James L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090275437","display_name":"Cameron Pattison","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pattison, Cameron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127777338","display_name":"Manuela Marai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marai, Manuela","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127829310","display_name":"Jacob Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jacob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5117864953","display_name":"Mark Schiefsky","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schiefsky, Mark J.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5127788702"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6123999953269958,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6123999953269958,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.18860000371932983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06650000065565109,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/terminology","display_name":"Terminology","score":0.7700999975204468},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5996999740600586},{"id":"https://openalex.org/keywords/scholarship","display_name":"Scholarship","score":0.41339999437332153},{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.4065999984741211},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.37470000982284546},{"id":"https://openalex.org/keywords/documentary-evidence","display_name":"Documentary evidence","score":0.3702000081539154},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.3659999966621399}],"concepts":[{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.7700999975204468},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5996999740600586},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.42250001430511475},{"id":"https://openalex.org/C2778061430","wikidata":"https://www.wikidata.org/wiki/Q188823","display_name":"Scholarship","level":2,"score":0.41339999437332153},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.4065999984741211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37540000677108765},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C95480931","wikidata":"https://www.wikidata.org/wiki/Q5287668","display_name":"Documentary evidence","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3666999936103821},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.36340001225471497},{"id":"https://openalex.org/C204983608","wikidata":"https://www.wikidata.org/wiki/Q2111958","display_name":"Productivity","level":2,"score":0.35600000619888306},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3449000120162964},{"id":"https://openalex.org/C205531365","wikidata":"https://www.wikidata.org/wiki/Q35497","display_name":"Ancient Greek","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C56608182","wikidata":"https://www.wikidata.org/wiki/Q541923","display_name":"Ancient literature","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C2994462464","wikidata":"https://www.wikidata.org/wiki/Q9129","display_name":"Greek language","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C204320433","wikidata":"https://www.wikidata.org/wiki/Q7268772","display_name":"Quality by Design","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.24119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.24119","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.24119","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.24119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.4101044535636902,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"study":[1],"presents":[2],"the":[3,41,60,116,136,166,176,197,210,229,238],"first":[4],"systematic,":[5],"reference-free":[6],"human":[7,94,206],"evaluation":[8,83,95,242],"of":[9,33,45,62,112,165,187,231,240],"large":[10],"language":[11],"model":[12],"(LLM)":[13],"machine":[14],"translation":[15,77,124,188],"(MT)":[16],"for":[17,193,228,237,244],"Ancient":[18,179],"Greek":[19,42,180],"(AG)":[20],"technical":[21],"prose.":[22],"We":[23,75,225],"evaluate":[24],"translations":[25,108],"by":[26,40,109,150],"three":[27],"commercial":[28],"LLMs":[29,121,232],"(Claude,":[30],"Gemini,":[31],"ChatGPT)":[32],"twenty":[34],"paragraph-length":[35],"passages":[36,152],"from":[37],"two":[38,54,151],"works":[39],"physician":[43],"Galen":[44],"Pergamum":[46],"(ca.":[47],"129-216":[48],"CE):":[49],"On":[50,59,115,135],"Mixtures,":[51],"which":[52,67],"has":[53,68],"published":[55],"English":[56],"translations,":[57],"and":[58,92,236],"Composition":[61],"Drugs":[63],"according":[64],"to":[65,105,161],"Kinds,":[66],"never":[69],"been":[70],"fully":[71],"translated":[72,118,167],"into":[73],"English.":[74],"assess":[76],"quality":[78,125,141,195,215],"using":[79],"both":[80],"standard":[81],"automated":[82,241],"metrics":[84,201],"(BLEU,":[85],"chrF++,":[86],"METEOR,":[87],"ROUGE-L,":[88],"BERTScore,":[89],"COMET,":[90],"BLEURT)":[91],"expert":[93,133],"via":[96,172],"a":[97,110,184,213],"modified":[98],"Multidimensional":[99],"Quality":[100],"Metrics":[101],"(MQM)":[102],"framework":[103],"applied":[104],"all":[106],"60":[107],"team":[111],"domain":[113],"specialists.":[114],"previously":[117],"expository":[119],"text,":[120,139],"achieved":[122],"high":[123,147],"(mean":[126],"MQM":[127],"score":[128],"95.2/100),":[129],"with":[130,146,205,212],"performance":[131],"approaching":[132],"level.":[134],"untranslated":[137,198],"pharmacological":[138],"aggregate":[140],"was":[142],"lower":[143],"(79.9/100)":[144],"but":[145,218],"variance":[148],"driven":[149],"presenting":[153],"extreme":[154],"terminological":[155],"density;":[156],"excluding":[157],"these,":[158],"scores":[159],"converged":[160],"within":[162],"4":[163],"points":[164],"text.":[168],"Terminology":[169],"rarity,":[170],"operationalized":[171],"corpus":[173],"frequency":[174],"in":[175,233],"literary":[177],"Diorisis":[178],"Corpus,":[181],"emerged":[182],"as":[183],"strong":[185],"predictor":[186],"failure":[189],"(r":[190],"=":[191],"-.97":[192],"passage-level":[194],"on":[196,209],"text).":[199],"Automated":[200],"showed":[202],"moderate":[203],"correlation":[204],"judgment":[207],"overall":[208],"text":[211],"wide":[214],"spread":[216],"(Composition),":[217],"no":[219],"metric":[220],"discriminated":[221],"among":[222],"high-quality":[223],"translations.":[224],"discuss":[226],"implications":[227],"use":[230],"Classical":[234],"scholarship":[235],"design":[239],"pipelines":[243],"low-resource":[245],"ancient":[246],"languages.":[247]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-03T00:00:00"}
