{"id":"https://openalex.org/W4411604665","doi":"https://doi.org/10.3390/data10070094","title":"Mixtec\u2013Spanish Parallel Text Dataset for Language Technology Development","display_name":"Mixtec\u2013Spanish Parallel Text Dataset for Language Technology Development","publication_year":2025,"publication_date":"2025-06-21","ids":{"openalex":"https://openalex.org/W4411604665","doi":"https://doi.org/10.3390/data10070094"},"language":"en","primary_location":{"id":"doi:10.3390/data10070094","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10070094","pdf_url":"https://www.mdpi.com/2306-5729/10/7/94/pdf?version=1750497458","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/10/7/94/pdf?version=1750497458","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063702951","display_name":"Hermilo Santiago-Benito","orcid":"https://orcid.org/0000-0001-5650-1240"},"institutions":[{"id":"https://openalex.org/I157492648","display_name":"Autonomous University of Queretaro","ror":"https://ror.org/00v8fdc16","country_code":"MX","type":"education","lineage":["https://openalex.org/I157492648"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Hermilo Santiago-Benito","raw_affiliation_strings":["Facultad de Inform\u00e1tica, Universidad Aut\u00f3noma de Quer\u00e9taro, Av. de las Ciencias S/N, Campus Juriquilla, Quer\u00e9taro 76230, Mexico"],"raw_orcid":"https://orcid.org/0000-0001-5650-1240","affiliations":[{"raw_affiliation_string":"Facultad de Inform\u00e1tica, Universidad Aut\u00f3noma de Quer\u00e9taro, Av. de las Ciencias S/N, Campus Juriquilla, Quer\u00e9taro 76230, Mexico","institution_ids":["https://openalex.org/I157492648"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068400798","display_name":"Diana\u2010Margarita C\u00f3rdova\u2010Esparza","orcid":"https://orcid.org/0000-0002-5657-7752"},"institutions":[{"id":"https://openalex.org/I157492648","display_name":"Autonomous University of Queretaro","ror":"https://ror.org/00v8fdc16","country_code":"MX","type":"education","lineage":["https://openalex.org/I157492648"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Diana-Margarita C\u00f3rdova-Esparza","raw_affiliation_strings":["Facultad de Inform\u00e1tica, Universidad Aut\u00f3noma de Quer\u00e9taro, Av. de las Ciencias S/N, Campus Juriquilla, Quer\u00e9taro 76230, Mexico"],"raw_orcid":"https://orcid.org/0000-0002-5657-7752","affiliations":[{"raw_affiliation_string":"Facultad de Inform\u00e1tica, Universidad Aut\u00f3noma de Quer\u00e9taro, Av. de las Ciencias S/N, Campus Juriquilla, Quer\u00e9taro 76230, Mexico","institution_ids":["https://openalex.org/I157492648"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072558771","display_name":"Juan Terven","orcid":"https://orcid.org/0000-0001-6662-0390"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Juan Terven","raw_affiliation_strings":["Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico"],"raw_orcid":"https://orcid.org/0000-0001-6662-0390","affiliations":[{"raw_affiliation_string":"Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051606070","display_name":"No\u00e9 Alejandro Castro-S\u00e1nchez","orcid":"https://orcid.org/0000-0002-8083-3891"},"institutions":[{"id":"https://openalex.org/I4210090124","display_name":"Tecnol\u00f3gico Nacional de M\u00e9xico","ror":"https://ror.org/00davry38","country_code":"MX","type":"government","lineage":["https://openalex.org/I1302736544","https://openalex.org/I4210090124","https://openalex.org/I4405258672"]},{"id":"https://openalex.org/I4210138655","display_name":"Centro Nacional de Investigaci\u00f3n y Desarrollo Tecnol\u00f3gico","ror":"https://ror.org/0314ecq26","country_code":"MX","type":"facility","lineage":["https://openalex.org/I4210138655"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"No\u00e9-Alejandro Castro-S\u00e1nchez","raw_affiliation_strings":["Centro Nacional de Investigaci\u00f3n y Desarrollo Tecnol\u00f3gico, Tecnol\u00f3gico Nacional de M\u00e9xico, Interior Internado Palmira S/N, Palmira, Cuernavaca 62493, Mexico"],"raw_orcid":"https://orcid.org/0000-0002-8083-3891","affiliations":[{"raw_affiliation_string":"Centro Nacional de Investigaci\u00f3n y Desarrollo Tecnol\u00f3gico, Tecnol\u00f3gico Nacional de M\u00e9xico, Interior Internado Palmira S/N, Palmira, Cuernavaca 62493, Mexico","institution_ids":["https://openalex.org/I4210138655","https://openalex.org/I4210090124"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026078700","display_name":"Ma. Teresa Garc\u00eda Ram\u00edrez","orcid":"https://orcid.org/0000-0002-5524-2002"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Teresa Garc\u00eda-Ramirez","raw_affiliation_strings":["Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico"],"raw_orcid":"https://orcid.org/0000-0002-5524-2002","affiliations":[{"raw_affiliation_string":"Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078258282","display_name":"Julio-Alejandro Romero-Gonz\u00e1lez","orcid":"https://orcid.org/0000-0001-7257-7595"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Julio-Alejandro Romero-Gonz\u00e1lez","raw_affiliation_strings":["Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico"],"raw_orcid":"https://orcid.org/0000-0001-7257-7595","affiliations":[{"raw_affiliation_string":"Centro de Investigaci\u00f3n en Ciencia Aplicada y Tecnolog\u00eda Avanzada\u2014Unidad Quer\u00e9taro, Instituto Polit\u00e9cnico Nacional, Cerro Blanco No. 141, Col. Colinas del Cimatario, Quer\u00e9taro 76090, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054582941","display_name":"Jos\u00e9 M. \u00c1lvarez-Alvarado","orcid":"https://orcid.org/0000-0002-1304-6791"},"institutions":[{"id":"https://openalex.org/I157492648","display_name":"Autonomous University of Queretaro","ror":"https://ror.org/00v8fdc16","country_code":"MX","type":"education","lineage":["https://openalex.org/I157492648"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 M. \u00c1lvarez-Alvarado","raw_affiliation_strings":["Facultad de Ingenier\u00eda, Universidad Aut\u00f3noma de Quer\u00e9taro, Quer\u00e9taro 76010, Mexico"],"raw_orcid":"https://orcid.org/0000-0002-1304-6791","affiliations":[{"raw_affiliation_string":"Facultad de Ingenier\u00eda, Universidad Aut\u00f3noma de Quer\u00e9taro, Quer\u00e9taro 76010, Mexico","institution_ids":["https://openalex.org/I157492648"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5068400798"],"corresponding_institution_ids":["https://openalex.org/I157492648"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06882367,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"7","first_page":"94","last_page":"94"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.84170001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.84170001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6452751159667969},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5149365067481995},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3237295150756836},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.320479154586792}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6452751159667969},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5149365067481995},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3237295150756836},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.320479154586792}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/data10070094","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10070094","pdf_url":"https://www.mdpi.com/2306-5729/10/7/94/pdf?version=1750497458","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7de6149f19f34ada807e1b34ca62b4ec","is_oa":true,"landing_page_url":"https://doaj.org/article/7de6149f19f34ada807e1b34ca62b4ec","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 10, Iss 7, p 94 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data10070094","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10070094","pdf_url":"https://www.mdpi.com/2306-5729/10/7/94/pdf?version=1750497458","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411604665.pdf","grobid_xml":"https://content.openalex.org/works/W4411604665.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"This":[0],"article":[1],"introduces":[2],"a":[3,83,147,166,171,177],"freely":[4],"available":[5],"Spanish\u2013Mixtec":[6],"parallel":[7],"corpus":[8,215],"designed":[9],"to":[10,127,135,154,164,244],"foster":[11],"natural":[12],"language":[13,20,100],"processing":[14],"(NLP)":[15],"development":[16],"for":[17],"an":[18,90,104],"indigenous":[19],"that":[21,226],"remains":[22],"digitally":[23],"low-resourced.":[24],"The":[25],"dataset,":[26],"comprising":[27],"14,587":[28],"sentence":[29],"pairs,":[30],"covers":[31],"Mixtec":[32,99,253],"variants":[33],"from":[34,110],"Guerrero":[35],"(Tlacoachistlahuaca,":[36],"Northern":[37],"Guerrero,":[38],"and":[39,41,60,75,98,102,120,139,176,189,196,198,203,222,238,250],"Xochapa)":[40],"Oaxaca":[42],"(Western":[43],"Coast,":[44],"Southern":[45],"Lowland,":[46],"Santa":[47],"Mar\u00eda":[48],"Yosoy\u00faa,":[49],"Central,":[50,54],"Lower":[51],"Ca\u00f1ada,":[52],"Western":[53],"San":[55],"Antonio":[56],"Huitepec,":[57],"Upper":[58],"Western,":[59],"Southwestern":[61],"Central).":[62],"Texts":[63],"are":[64],"classified":[65],"into":[66,233],"four":[67],"main":[68],"domains":[69],"as":[70,87],"follows:":[71,88],"education,":[72],"law,":[73],"health,":[74],"religion.":[76],"To":[77],"compile":[78],"these":[79],"data,":[80],"we":[81],"conducted":[82,146],"two-phase":[84],"collection":[85],"process":[86],"first,":[89],"online":[91],"search":[92],"of":[93,107,113,117,150,174,180,194,201,248],"government":[94],"portals,":[95],"religious":[96],"organizations,":[97],"blogs;":[101],"second,":[103],"on-site":[105],"retrieval":[106],"physical":[108,129],"texts":[109],"the":[111,114,151,213,245,252],"library":[112],"Autonomous":[115],"University":[116],"Quer\u00e9taro.":[118],"Scanning":[119],"optical":[121],"character":[122,137],"recognition":[123],"were":[124],"then":[125],"performed":[126],"digitize":[128],"materials,":[130],"followed":[131],"by":[132],"manual":[133],"correction":[134],"fix":[136],"misreadings":[138],"remove":[140],"duplicates":[141],"or":[142],"irrelevant":[143],"segments.":[144],"We":[145,224],"preliminary":[148],"evaluation":[149],"collected":[152],"data":[153,209],"validate":[155],"its":[156],"usability":[157],"in":[158],"automatic":[159],"translation":[160],"systems.":[161],"From":[162],"Spanish":[163],"Mixtec,":[165],"fine-tuned":[167,184],"GPT-4o-mini":[168],"model":[169],"yielded":[170,191],"BLEU":[172,192],"score":[173,179],"0.22":[175],"TER":[178,199],"122.86,":[181],"while":[182,242],"two":[183],"open":[185],"source":[186],"models":[187],"mBART-50":[188],"M2M-100":[190],"scores":[193,200],"4.2":[195],"2.63":[197],"98.99":[202],"104.87,":[204],"respectively.":[205],"All":[206],"code":[207],"demonstrating":[208],"usage,":[210],"along":[211],"with":[212],"final":[214],"itself,":[216],"is":[217],"publicly":[218],"accessible":[219],"via":[220],"GitHub":[221],"Figshare.":[223],"anticipate":[225],"this":[227],"resource":[228],"will":[229],"enable":[230],"further":[231],"research":[232],"machine":[234],"translation,":[235],"speech":[236],"recognition,":[237],"other":[239],"NLP":[240],"applications":[241],"contributing":[243],"broader":[246],"goal":[247],"preserving":[249],"revitalizing":[251],"language.":[254]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
