{"id":"https://openalex.org/W4387118761","doi":"https://doi.org/10.3390/bdcc7040160","title":"Defining Semantically Close Words of Kazakh Language with Distributed System Apache Spark","display_name":"Defining Semantically Close Words of Kazakh Language with Distributed System Apache Spark","publication_year":2023,"publication_date":"2023-09-27","ids":{"openalex":"https://openalex.org/W4387118761","doi":"https://doi.org/10.3390/bdcc7040160"},"language":"en","primary_location":{"id":"doi:10.3390/bdcc7040160","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040160","pdf_url":"https://www.mdpi.com/2504-2289/7/4/160/pdf?version=1695819208","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-2289/7/4/160/pdf?version=1695819208","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005644516","display_name":"Dauren Ayazbayev","orcid":"https://orcid.org/0000-0001-9973-2145"},"institutions":[{"id":"https://openalex.org/I4210120897","display_name":"Suleyman Demirel University","ror":"https://ror.org/02s8x5a25","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210120897"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Dauren Ayazbayev","raw_affiliation_strings":["Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan","institution_ids":["https://openalex.org/I4210120897"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073070441","display_name":"Andrey Bogdanchikov","orcid":"https://orcid.org/0000-0001-9693-7487"},"institutions":[{"id":"https://openalex.org/I4210120897","display_name":"Suleyman Demirel University","ror":"https://ror.org/02s8x5a25","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210120897"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Andrey Bogdanchikov","raw_affiliation_strings":["Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan","institution_ids":["https://openalex.org/I4210120897"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084084991","display_name":"Kamila Orynbekova","orcid":"https://orcid.org/0000-0002-2182-2914"},"institutions":[{"id":"https://openalex.org/I4210120897","display_name":"Suleyman Demirel University","ror":"https://ror.org/02s8x5a25","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210120897"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Kamila Orynbekova","raw_affiliation_strings":["Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Suleyman Demirel University, Kaskelen 040900, Kazakhstan","institution_ids":["https://openalex.org/I4210120897"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056484904","display_name":"Iraklis Varlamis","orcid":"https://orcid.org/0000-0002-0876-8167"},"institutions":[{"id":"https://openalex.org/I32762134","display_name":"Harokopio University of Athens","ror":"https://ror.org/02k5gp281","country_code":"GR","type":"education","lineage":["https://openalex.org/I32762134"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Iraklis Varlamis","raw_affiliation_strings":["Department of Informatics and Telematics, Harokopio University of Athens, 17779 Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Department of Informatics and Telematics, Harokopio University of Athens, 17779 Athens, Greece","institution_ids":["https://openalex.org/I32762134"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005644516","https://openalex.org/A5073070441"],"corresponding_institution_ids":["https://openalex.org/I4210120897"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.8711,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78990914,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"7","issue":"4","first_page":"160","last_page":"160"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8519601821899414},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6695538759231567},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6257219910621643},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.6048418879508972},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6023423671722412},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49801111221313477},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.47542837262153625},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.42041903734207153},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4188050925731659},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0831538736820221}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8519601821899414},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6695538759231567},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6257219910621643},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.6048418879508972},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6023423671722412},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49801111221313477},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.47542837262153625},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42041903734207153},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4188050925731659},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0831538736820221},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/bdcc7040160","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040160","pdf_url":"https://www.mdpi.com/2504-2289/7/4/160/pdf?version=1695819208","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:2418faeb2c244d348d9324c35ca6ab0b","is_oa":true,"landing_page_url":"https://doaj.org/article/2418faeb2c244d348d9324c35ca6ab0b","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data and Cognitive Computing, Vol 7, Iss 4, p 160 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2504-2289/7/4/160/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/bdcc7040160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/bdcc7040160","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040160","pdf_url":"https://www.mdpi.com/2504-2289/7/4/160/pdf?version=1695819208","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7799999713897705}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387118761.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W632432350","https://openalex.org/W1523296404","https://openalex.org/W1662133657","https://openalex.org/W2124509324","https://openalex.org/W2132234208","https://openalex.org/W2914262738","https://openalex.org/W2946755293","https://openalex.org/W2963425185","https://openalex.org/W2979066127","https://openalex.org/W3013386667","https://openalex.org/W3013794879","https://openalex.org/W3016822073","https://openalex.org/W3163479523","https://openalex.org/W3216581765","https://openalex.org/W4321062141","https://openalex.org/W4378449954","https://openalex.org/W4385240189","https://openalex.org/W4385380836","https://openalex.org/W6754723338","https://openalex.org/W6758460675"],"related_works":["https://openalex.org/W3078371441","https://openalex.org/W2974225181","https://openalex.org/W2116838603","https://openalex.org/W4288108740","https://openalex.org/W2559461694","https://openalex.org/W2766760871","https://openalex.org/W2151108588","https://openalex.org/W1997312918","https://openalex.org/W2047828095","https://openalex.org/W2365659184"],"abstract_inverted_index":{"This":[0],"work":[1],"focuses":[2],"on":[3,114,144],"determining":[4],"semantically":[5,135,177],"close":[6],"words":[7,27,81,172,179,240],"and":[8,45,56,89,165,173,207,222,225],"using":[9,98,167],"semantic":[10,24,93,210],"similarity":[11,25,94,101],"in":[12,14,19,48,74,82,85,130,158,220,243],"general":[13],"order":[15,104,131],"to":[16,38,78,105,127,132,149,198,212],"improve":[17],"performance":[18],"information":[20,36],"retrieval":[21,37],"tasks.":[22],"The":[23,71,216],"of":[26,95,109,117,121,170,186],"is":[28,77,147,218],"an":[29,234],"important":[30],"task":[31,60],"with":[32,50],"many":[33],"applications":[34],"from":[35,180],"spell":[39],"checking":[40],"or":[41,241],"even":[42],"document":[43],"clustering":[44],"classification.":[46],"Although,":[47],"languages":[49,65],"rich":[51],"linguistic":[52],"resources,":[53],"the":[54,80,92,96,107,110,115,122,134,151,175,200,214,223,229,244],"methods":[55,231],"tools":[57],"for":[58,183,237],"this":[59],"are":[61,163],"well":[62],"established,":[63],"some":[64],"do":[66],"not":[67],"have":[68,126],"such":[69],"tools.":[70],"first":[72],"step":[73],"our":[75],"experiment":[76],"represent":[79],"a":[83,86,99,168,181,189,204,208],"collection":[84],"vector":[87,100],"form":[88],"then":[90],"define":[91,133],"terms":[97],"method.":[102],"In":[103,188],"tame":[106],"complexity":[108],"task,":[111],"which":[112],"relies":[113],"number":[116],"word":[118,137],"(and,":[119],"consequently,":[120],"vector)":[123],"pairs":[124],"that":[125,142,228],"be":[128],"combined":[129],"closest":[136],"pairs,":[138],"A":[139],"distributed":[140],"method":[141],"runs":[143],"Apache":[145],"Spark":[146],"designed":[148],"reduce":[150],"calculation":[152],"time":[153],"by":[154],"running":[155],"comparison":[156],"tasks":[157],"parallel.":[159],"Three":[160],"alternative":[161],"implementations":[162],"proposed":[164,230],"tested":[166],"list":[169],"target":[171],"seeking":[174],"most":[176],"similar":[178,239],"lexicon":[182],"each":[184],"one":[185],"them.":[187],"second":[190],"step,":[191],"we":[192],"employ":[193],"pre-trained":[194],"multilingual":[195],"sentence":[196,205],"transformers":[197],"capture":[199],"content":[201],"semantics":[202],"at":[203],"level":[206],"vector-based":[209],"index":[211],"accelerate":[213],"searches.":[215],"code":[217],"written":[219],"MapReduce,":[221],"experiments":[224],"results":[226],"show":[227],"can":[232],"provide":[233],"interesting":[235],"solution":[236],"finding":[238],"texts":[242],"Kazakh":[245],"language.":[246]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4}],"updated_date":"2026-02-28T09:26:25.869077","created_date":"2025-10-10T00:00:00"}
