{"id":"https://openalex.org/W4414511025","doi":"https://doi.org/10.4114/intartif.vol28iss76pp283-300","title":"Generating a Culturally and Linguistically Adapted Word Similarity Benchmark for Yucatec Maya","display_name":"Generating a Culturally and Linguistically Adapted Word Similarity Benchmark for Yucatec Maya","publication_year":2025,"publication_date":"2025-09-25","ids":{"openalex":"https://openalex.org/W4414511025","doi":"https://doi.org/10.4114/intartif.vol28iss76pp283-300"},"language":"en","primary_location":{"id":"doi:10.4114/intartif.vol28iss76pp283-300","is_oa":true,"landing_page_url":"https://doi.org/10.4114/intartif.vol28iss76pp283-300","pdf_url":"http://journal.iberamia.org/index.php/intartif/article/download/2119/265","source":{"id":"https://openalex.org/S4210203986","display_name":"INTELIGENCIA ARTIFICIAL","issn_l":"1137-3601","issn":["1137-3601","1988-3064"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321681","host_organization_name":"Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial","host_organization_lineage":["https://openalex.org/P4310321681"],"host_organization_lineage_names":["Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Inteligencia Artificial","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"http://journal.iberamia.org/index.php/intartif/article/download/2119/265","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058452665","display_name":"Alejandro Molina-Villegas","orcid":"https://orcid.org/0000-0001-9398-8844"},"institutions":[{"id":"https://openalex.org/I4210096504","display_name":"Centro Nacional de Informaci\u00f3n Geogr\u00e1fica","ror":"https://ror.org/00nn73a81","country_code":"ES","type":"facility","lineage":["https://openalex.org/I4210096504"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Alejandro Molina-Villegas","raw_affiliation_strings":["SECIHTI - Centro de Investigaci\u00f3n en Ciencias de Informaci\u00f3n Geoespacial, Yucatan, Mexico"],"affiliations":[{"raw_affiliation_string":"SECIHTI - Centro de Investigaci\u00f3n en Ciencias de Informaci\u00f3n Geoespacial, Yucatan, Mexico","institution_ids":["https://openalex.org/I4210096504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119730156","display_name":"Joel Suro-Villalobos","orcid":null},"institutions":[{"id":"https://openalex.org/I4210117190","display_name":"CIC nanoGUNE","ror":"https://ror.org/023ke8y90","country_code":"ES","type":"nonprofit","lineage":["https://openalex.org/I4210117190"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Joel Suro-Villalobos","raw_affiliation_strings":["ShogunOS, Mexico"],"affiliations":[{"raw_affiliation_string":"ShogunOS, Mexico","institution_ids":["https://openalex.org/I4210117190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064832555","display_name":"Jorge Reyes-Maga\u00f1a","orcid":"https://orcid.org/0000-0002-8296-1344"},"institutions":[{"id":"https://openalex.org/I23878562","display_name":"Autonomous University of Yucat\u00e1n","ror":"https://ror.org/032p1n739","country_code":"MX","type":"education","lineage":["https://openalex.org/I23878562"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Jorge Reyes-Maga\u00f1a","raw_affiliation_strings":["Universidad Aut\u00f3noma de Yucat\u00e1n, Mexico"],"affiliations":[{"raw_affiliation_string":"Universidad Aut\u00f3noma de Yucat\u00e1n, Mexico","institution_ids":["https://openalex.org/I23878562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057487296","display_name":"Silvia Fern\u00e1ndez-Sabido","orcid":"https://orcid.org/0009-0009-8509-175X"},"institutions":[{"id":"https://openalex.org/I4210096504","display_name":"Centro Nacional de Informaci\u00f3n Geogr\u00e1fica","ror":"https://ror.org/00nn73a81","country_code":"ES","type":"facility","lineage":["https://openalex.org/I4210096504"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Silvia Fernandez-Sabido","raw_affiliation_strings":["SECIHTI - Centro de Investigaci\u00f3n en Ciencias de Informaci\u00f3n Geoespacial, Yucatan, Mexico"],"affiliations":[{"raw_affiliation_string":"SECIHTI - Centro de Investigaci\u00f3n en Ciencias de Informaci\u00f3n Geoespacial, Yucatan, Mexico","institution_ids":["https://openalex.org/I4210096504"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058452665"],"corresponding_institution_ids":["https://openalex.org/I4210096504"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1359333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":"76","first_page":"283","last_page":"300"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.928600013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.928600013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9150999784469604,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6940000057220459},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6226999759674072},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.6093000173568726},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5016000270843506},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.45320001244544983},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4440999925136566},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.44369998574256897},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.4088999927043915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7990999817848206},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6940000057220459},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6927000284194946},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.682200014591217},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6226999759674072},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.6093000173568726},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5016000270843506},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.45320001244544983},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4440999925136566},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.44369998574256897},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.4088999927043915},{"id":"https://openalex.org/C2778828372","wikidata":"https://www.wikidata.org/wiki/Q5283209","display_name":"Distributional semantics","level":3,"score":0.40630000829696655},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.39629998803138733},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.38269999623298645},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.26510000228881836}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.4114/intartif.vol28iss76pp283-300","is_oa":true,"landing_page_url":"https://doi.org/10.4114/intartif.vol28iss76pp283-300","pdf_url":"http://journal.iberamia.org/index.php/intartif/article/download/2119/265","source":{"id":"https://openalex.org/S4210203986","display_name":"INTELIGENCIA ARTIFICIAL","issn_l":"1137-3601","issn":["1137-3601","1988-3064"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321681","host_organization_name":"Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial","host_organization_lineage":["https://openalex.org/P4310321681"],"host_organization_lineage_names":["Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Inteligencia Artificial","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c847987d4c5841ca94133f2ed03b64ca","is_oa":true,"landing_page_url":"https://doaj.org/article/c847987d4c5841ca94133f2ed03b64ca","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Inteligencia Artificial, Vol 28, Iss 76 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.4114/intartif.vol28iss76pp283-300","is_oa":true,"landing_page_url":"https://doi.org/10.4114/intartif.vol28iss76pp283-300","pdf_url":"http://journal.iberamia.org/index.php/intartif/article/download/2119/265","source":{"id":"https://openalex.org/S4210203986","display_name":"INTELIGENCIA ARTIFICIAL","issn_l":"1137-3601","issn":["1137-3601","1988-3064"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321681","host_organization_name":"Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial","host_organization_lineage":["https://openalex.org/P4310321681"],"host_organization_lineage_names":["Asociaci\u00f3n Espa\u00f1ola para la Inteligencia Artificial"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Inteligencia Artificial","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310049","display_name":"W.K. Kellogg Foundation","ror":"https://ror.org/03gtcr185"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414511025.pdf","grobid_xml":"https://content.openalex.org/works/W4414511025.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"the":[1,14,71,82,91,134,157],"field":[2],"of":[3,13,136],"AI,":[4],"word":[5,35,67,164],"embedding":[6,165],"models":[7,109],"have":[8],"proven":[9],"to":[10,49,159],"be":[11,160],"one":[12],"most":[15],"effective":[16],"methods":[17],"for":[18,37,64,74,124,162],"capturing":[19],"semantic":[20,75],"and":[21,53,96,99,110,121],"syntactic":[22],"relationships":[23],"between":[24,103],"words,":[25],"enabling":[26],"significant":[27],"advancements":[28],"in":[29,167],"natural":[30],"language":[31,89,108],"processing.":[32],"However,":[33],"producing":[34],"embeddings":[36,68,105,123],"low-resource":[38,169],"indigenous":[39],"languages\u2014such":[40],"as":[41],"Yucatec":[42,125],"Maya\u2014often":[43],"suffers":[44],"from":[45,85,106],"poor":[46],"reliability":[47],"due":[48],"limited":[50],"data":[51],"availability":[52],"unsuitable":[54],"evaluation":[55,137],"benchmarks.In":[56],"this":[57,117],"work,":[58],"we":[59],"propose":[60],"a":[61,86,140,152],"novel":[62],"methodology":[63],"constructing":[65],"reliable":[66,120],"by":[69],"adapting":[70],"Swadesh":[72,83],"List":[73,84],"similarity":[76,101],"evaluation.":[77],"Our":[78,113],"approach":[79,150],"involves":[80],"translating":[81],"high-resource":[87],"pivot":[88],"into":[90],"target":[92],"language,":[93],"applying":[94],"linguistic":[95],"cultural":[97],"filtering,":[98],"correlating":[100],"scores":[102],"pivot-language":[104],"large":[107],"target-language":[111],"embeddings.":[112],"results":[114],"demonstrate":[115],"that":[116,133],"method":[118],"produces":[119],"interpretable":[122],"Maya.":[126],"Furthermore,":[127],"our":[128],"analysis":[129],"provides":[130],"compelling":[131],"evidence":[132],"choice":[135],"benchmark":[138],"has":[139],"far":[141],"greater":[142],"impact":[143],"on":[144],"reported":[145],"performance":[146],"than":[147],"hyperparameter":[148],"optimization.This":[149],"establishes":[151],"robust":[153],"new":[154],"framework":[155],"with":[156],"potential":[158],"adapted":[161],"improving":[163],"generation":[166],"other":[168],"languages.":[170]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
