{"id":"https://openalex.org/W2970682420","doi":"https://doi.org/10.18653/v1/w19-5343","title":"Neural Machine Translation for English\u2013Kazakh with Morphological Segmentation and Synthetic Data","display_name":"Neural Machine Translation for English\u2013Kazakh with Morphological Segmentation and Synthetic Data","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970682420","doi":"https://doi.org/10.18653/v1/w19-5343","mag":"2970682420"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-5343","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5343","pdf_url":"https://www.aclweb.org/anthology/W19-5343.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-5343.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062029765","display_name":"Antonio Toral","orcid":"https://orcid.org/0000-0003-2357-2960"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Antonio Toral","raw_affiliation_strings":["Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055868817","display_name":"Lukas Edman","orcid":"https://orcid.org/0000-0001-8215-3614"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Lukas Edman","raw_affiliation_strings":["Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008785601","display_name":"Galiya Yeshmagambetova","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Galiya Yeshmagambetova","raw_affiliation_strings":["Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004856135","display_name":"Jennifer Spenader","orcid":"https://orcid.org/0000-0003-2837-1500"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jennifer Spenader","raw_affiliation_strings":["Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Language and Cognition, Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"\u2020Center for Language and Cognition, \u2021Institute for Artificial Intelligence University of Groningen The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062029765"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":0.8671,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.81131902,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"386","last_page":"392"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9685999751091003,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kazakh","display_name":"Kazakh","score":0.9861189723014832},{"id":"https://openalex.org/keywords/agglutinative-language","display_name":"Agglutinative language","score":0.9126454591751099},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8031324744224548},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7777601480484009},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6880878210067749},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6841408014297485},{"id":"https://openalex.org/keywords/turkish","display_name":"Turkish","score":0.5112860202789307},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4683992564678192},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4581195116043091},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44818803668022156},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.445056289434433},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2606082856655121},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.09813651442527771}],"concepts":[{"id":"https://openalex.org/C2781297163","wikidata":"https://www.wikidata.org/wiki/Q9252","display_name":"Kazakh","level":2,"score":0.9861189723014832},{"id":"https://openalex.org/C80875076","wikidata":"https://www.wikidata.org/wiki/Q171263","display_name":"Agglutinative language","level":3,"score":0.9126454591751099},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8031324744224548},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7777601480484009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6880878210067749},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6841408014297485},{"id":"https://openalex.org/C2781121862","wikidata":"https://www.wikidata.org/wiki/Q256","display_name":"Turkish","level":2,"score":0.5112860202789307},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4683992564678192},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4581195116043091},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44818803668022156},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.445056289434433},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2606082856655121},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.09813651442527771},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/w19-5343","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5343","pdf_url":"https://www.aclweb.org/anthology/W19-5343.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:publications/66a779dd-8773-4d94-837f-5e3e3d5dbce1","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/66a779dd-8773-4d94-837f-5e3e3d5dbce1","pdf_url":"https://pure.rug.nl/ws/files/96085578/W19_5343.pdf","source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Toral Ruiz, A, Edman, L, Spenader, J & Yeshmagambetova, G 2019, Neural Machine Translation for English\u2013Kazakh with Morphological Segmentation and Synthetic Data. in Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1). vol. 2, Association for Computational Linguistics (ACL), Forence, Italy, pp. 386-392.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.rug.nl:openaire/66a779dd-8773-4d94-837f-5e3e3d5dbce1","is_oa":true,"landing_page_url":"https://hdl.handle.net/11370/66a779dd-8773-4d94-837f-5e3e3d5dbce1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Toral Ruiz, A, Edman, L, Spenader, J & Yeshmagambetova, G 2019, Neural Machine Translation for English\u2013Kazakh with Morphological Segmentation and Synthetic Data. in Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1). vol. 2, Association for Computational Linguistics (ACL), Forence, Italy, pp. 386-392.","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-5343","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5343","pdf_url":"https://www.aclweb.org/anthology/W19-5343.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970682420.pdf","grobid_xml":"https://content.openalex.org/works/W2970682420.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W309335912","https://openalex.org/W2101105183","https://openalex.org/W2133564696","https://openalex.org/W2250342921","https://openalex.org/W2250761258","https://openalex.org/W2251610689","https://openalex.org/W2508907594","https://openalex.org/W2550821151","https://openalex.org/W2563351168","https://openalex.org/W2595715041","https://openalex.org/W2887920589","https://openalex.org/W2902918014","https://openalex.org/W2916548775","https://openalex.org/W2962735107","https://openalex.org/W2962784628","https://openalex.org/W2963216553","https://openalex.org/W2963403868","https://openalex.org/W2963626623","https://openalex.org/W2964053711","https://openalex.org/W2964308564","https://openalex.org/W2964343359","https://openalex.org/W2973088264","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2970682420","https://openalex.org/W3203990896","https://openalex.org/W3013913162","https://openalex.org/W2086703572","https://openalex.org/W2098104139","https://openalex.org/W4290735204","https://openalex.org/W4290739335","https://openalex.org/W2810296466","https://openalex.org/W2950091200","https://openalex.org/W2963939565"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,7,12,20,40,55,66,70,87],"systems":[4],"submitted":[5],"by":[6],"University":[8],"of":[9,43,57,86],"Groningen":[10],"to":[11],"English-Kazakh":[13,58],"language":[14],"pair":[15],"(both":[16,35],"translation":[17,24],"directions)":[18],"for":[19,65,69,78,82],"WMT":[21],"2019":[22],"news":[23],"task.":[25],"We":[26],"explore":[27],"potential":[28],"benefits":[29],"from":[30,47],"using":[31],"(i)":[32],"morphological":[33],"segmentation":[34],"unsupervised":[36],"and":[37,52,60,68,80],"rule-based),":[38],"given":[39,54],"agglutinative":[41],"nature":[42],"Kazakh,":[44],"(ii)":[45],"data":[46],"two":[48],"additional":[49],"languages":[50],"(Turkish":[51],"Russian),":[53],"scarcity":[56],"data,":[59,63],"(iii)":[61],"synthetic":[62],"both":[64],"source":[67],"target":[71],"language.":[72],"Our":[73],"best":[74],"submissions":[75],"ranked":[76],"second":[77],"KazakhEnglish":[79],"third":[81],"EnglishKazakh":[83],"in":[84],"terms":[85],"BLEU":[88],"automatic":[89],"evaluation":[90],"metric.":[91]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
