{"id":"https://openalex.org/W4415721977","doi":"https://doi.org/10.3390/info16110943","title":"A Systematic Evaluation of Large Language Models and Retrieval-Augmented Generation for the Task of Kazakh Question Answering","display_name":"A Systematic Evaluation of Large Language Models and Retrieval-Augmented Generation for the Task of Kazakh Question Answering","publication_year":2025,"publication_date":"2025-10-30","ids":{"openalex":"https://openalex.org/W4415721977","doi":"https://doi.org/10.3390/info16110943"},"language":"en","primary_location":{"id":"doi:10.3390/info16110943","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16110943","pdf_url":"https://www.mdpi.com/2078-2489/16/11/943/pdf?version=1761829626","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/16/11/943/pdf?version=1761829626","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092706663","display_name":"Aigerim Mansurova","orcid":"https://orcid.org/0009-0003-1978-9574"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Aigerim Mansurova","raw_affiliation_strings":["Big Data and Blockchain Technologies Research and Innovation Center, Astana IT University, Astana 020000, Kazakhstan"],"raw_orcid":"https://orcid.org/0009-0003-1978-9574","affiliations":[{"raw_affiliation_string":"Big Data and Blockchain Technologies Research and Innovation Center, Astana IT University, Astana 020000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082431551","display_name":"Arailym Tleubayeva","orcid":"https://orcid.org/0000-0001-9560-9756"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Arailym Tleubayeva","raw_affiliation_strings":["School of Artificial Intelligence and Data Science, Astana IT University, Astana 020000, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0001-9560-9756","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Data Science, Astana IT University, Astana 020000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088617921","display_name":"Aliya Nugumanova","orcid":"https://orcid.org/0000-0001-5522-4421"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Aliya Nugumanova","raw_affiliation_strings":["Big Data and Blockchain Technologies Research and Innovation Center, Astana IT University, Astana 020000, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0001-5522-4421","affiliations":[{"raw_affiliation_string":"Big Data and Blockchain Technologies Research and Innovation Center, Astana IT University, Astana 020000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067593236","display_name":"Adai Shomanov","orcid":"https://orcid.org/0000-0001-8253-7474"},"institutions":[{"id":"https://openalex.org/I60559429","display_name":"Nazarbayev University","ror":"https://ror.org/052bx8q98","country_code":"KZ","type":"education","lineage":["https://openalex.org/I60559429"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Adai Shomanov","raw_affiliation_strings":["Computer Science Department, Nazarbayev University, Astana 020000, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Nazarbayev University, Astana 020000, Kazakhstan","institution_ids":["https://openalex.org/I60559429"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058858156","display_name":"\u015eadi Evren \u015eeker","orcid":"https://orcid.org/0000-0002-7323-3695"},"institutions":[{"id":"https://openalex.org/I67581229","display_name":"Istanbul University","ror":"https://ror.org/03a5qrr21","country_code":"TR","type":"education","lineage":["https://openalex.org/I67581229"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Sadi Evren Seker","raw_affiliation_strings":["Department of Computer Engineering, Faculty of Computer and Information Technologies, Istanbul University, 34320 Istanbul, Turkey"],"raw_orcid":"https://orcid.org/0000-0002-7323-3695","affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Faculty of Computer and Information Technologies, Istanbul University, 34320 Istanbul, Turkey","institution_ids":["https://openalex.org/I67581229"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082431551","https://openalex.org/A5092706663"],"corresponding_institution_ids":["https://openalex.org/I4210141757"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":8.5032,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97442941,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"16","issue":"11","first_page":"943","last_page":"943"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.896399974822998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.896399974822998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.01769999973475933,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7911999821662903},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6769999861717224},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5705000162124634},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5541999936103821},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.47200000286102295},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45509999990463257},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4117000102996826}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7911999821662903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.723800003528595},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6769999861717224},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5705000162124634},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5640000104904175},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5541999936103821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5135999917984009},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.47200000286102295},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45509999990463257},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.384799987077713},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3824999928474426},{"id":"https://openalex.org/C2781297163","wikidata":"https://www.wikidata.org/wiki/Q9252","display_name":"Kazakh","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.3352000117301941},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29649999737739563},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27390000224113464},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26429998874664307}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/info16110943","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16110943","pdf_url":"https://www.mdpi.com/2078-2489/16/11/943/pdf?version=1761829626","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7e7b04cafe4c4d64b05dec20beb7c1c8","is_oa":true,"landing_page_url":"https://doaj.org/article/7e7b04cafe4c4d64b05dec20beb7c1c8","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 16, Iss 11, p 943 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info16110943","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16110943","pdf_url":"https://www.mdpi.com/2078-2489/16/11/943/pdf?version=1761829626","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415721977.pdf","grobid_xml":"https://content.openalex.org/works/W4415721977.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W2183366530","https://openalex.org/W3176793246","https://openalex.org/W4294833327","https://openalex.org/W4309674289","https://openalex.org/W4376874793","https://openalex.org/W4384154918","https://openalex.org/W4386024619","https://openalex.org/W4386210993","https://openalex.org/W4392487838","https://openalex.org/W4396653761","https://openalex.org/W4400665563","https://openalex.org/W4400975273","https://openalex.org/W4401610852","https://openalex.org/W4402308184","https://openalex.org/W4402371950","https://openalex.org/W4402670856","https://openalex.org/W4402671066","https://openalex.org/W4402683452","https://openalex.org/W4403791697","https://openalex.org/W4404280355","https://openalex.org/W4405081690","https://openalex.org/W4406071956","https://openalex.org/W4406472599","https://openalex.org/W4406477144","https://openalex.org/W4407298138","https://openalex.org/W4409637885","https://openalex.org/W4410030662","https://openalex.org/W4410515544","https://openalex.org/W4410600121","https://openalex.org/W4411026510","https://openalex.org/W4411345221","https://openalex.org/W4411420530","https://openalex.org/W4411531956","https://openalex.org/W4411565477","https://openalex.org/W4411606774","https://openalex.org/W4411630045","https://openalex.org/W4412708385","https://openalex.org/W4412728962","https://openalex.org/W4413180016","https://openalex.org/W4413415991","https://openalex.org/W4413886305","https://openalex.org/W4415124086","https://openalex.org/W4415230966","https://openalex.org/W7084137563"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,48,80,181],"systematic":[4],"evaluation":[5,50],"of":[6,29,106,112,118,140,164],"large":[7],"language":[8],"models":[9,38,84,174],"(LLMs)":[10],"and":[11,35,44,66,69,166],"retrieval-augmented":[12],"generation":[13],"(RAG)":[14],"approaches":[15],"for":[16],"question":[17],"answering":[18],"(QA)":[19],"in":[20,86,170,180],"the":[21,27,92,96,121,162],"low-resource":[22,182],"Kazakh":[23,173],"language.":[24],"We":[25],"assess":[26],"performance":[28,179],"existing":[30],"proprietary":[31,83],"(GPT-4o,":[32],"Gemini":[33],"2.5-flash)":[34],"open-source":[36,172],"Kazakh-oriented":[37],"(KazLLM-8B,":[39],"Sherkala-8B,":[40],"Irbis-7B)":[41],"across":[42],"closed-book":[43,87,104],"RAG":[45,75,90,98,123,144],"settings.":[46],"Within":[47],"three-stage":[49],"framework":[51],"we":[52],"benchmark":[53],"retriever":[54,129],"quality,":[55],"examine":[56],"LLM":[57],"abilities":[58],"such":[59],"as":[60],"knowledge-gap":[61],"detection,":[62],"external":[63],"truth":[64],"integration":[65],"context":[67,167],"grounding,":[68],"measures":[70],"gains":[71],"from":[72,102],"realistic":[73],"end-to-end":[74,122],"pipelines.":[76],"Our":[77],"results":[78],"show":[79,146],"clear":[81],"pattern:":[82],"lead":[85],"QA,":[88],"but":[89],"narrows":[91],"gap":[93],"substantially.":[94],"Under":[95],"Ideal":[97],"setting,":[99],"KazLLM-8B":[100,125],"improves":[101],"its":[103],"baseline":[105],"0.427":[107],"to":[108,134,175],"reach":[109],"answer":[110,131],"correctness":[111,132],"0.867,":[113],"closely":[114],"matching":[115],"GPT-4o\u2019s":[116,137],"score":[117,139],"0.869.":[119],"In":[120],"setup,":[124],"paired":[126],"with":[127],"Snowflake":[128],"achieved":[130],"up":[133],"0.754,":[135],"surpassing":[136],"best":[138],"0.632.":[141],"Despite":[142],"improvements,":[143],"outcomes":[145],"an":[147],"inconsistency:":[148],"high":[149,155],"retrieval":[150],"metrics":[151],"do":[152],"not":[153],"guarantee":[154],"QA":[156,178],"system":[157],"accuracy.":[158],"The":[159],"findings":[160],"highlight":[161],"importance":[163],"retrievers":[165],"grounding":[168],"strategies":[169],"enabling":[171],"deliver":[176],"competitive":[177],"setting.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":5}],"updated_date":"2026-06-18T10:00:31.954636","created_date":"2025-10-31T00:00:00"}
