{"id":"https://openalex.org/W4417529114","doi":"https://doi.org/10.1016/j.caeai.2025.100539","title":"EvalYaks: Instruction tuning datasets and LoRA fine-tuned models for automated scoring of CEFR B2 speaking assessment transcripts","display_name":"EvalYaks: Instruction tuning datasets and LoRA fine-tuned models for automated scoring of CEFR B2 speaking assessment transcripts","publication_year":2025,"publication_date":"2025-12-20","ids":{"openalex":"https://openalex.org/W4417529114","doi":"https://doi.org/10.1016/j.caeai.2025.100539"},"language":"en","primary_location":{"id":"doi:10.1016/j.caeai.2025.100539","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.caeai.2025.100539","pdf_url":null,"source":{"id":"https://openalex.org/S4210183364","display_name":"Computers and Education Artificial Intelligence","issn_l":"2666-920X","issn":["2666-920X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers and Education: Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1016/j.caeai.2025.100539","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099785482","display_name":"Nicy Scaria","orcid":"https://orcid.org/0009-0004-8699-0312"},"institutions":[{"id":"https://openalex.org/I1287097855","display_name":"Atkins (United States)","ror":"https://ror.org/05df6vn84","country_code":"US","type":"company","lineage":["https://openalex.org/I1287097855","https://openalex.org/I4210094877"]},{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN","US"],"is_corresponding":true,"raw_author_name":"Nicy Scaria","raw_affiliation_strings":["Computational and Data Sciences, Indian Institute of Science, Bangalore, India","Talking Yak English Learning Private Limited, Bangalore, India"],"raw_orcid":"https://orcid.org/0009-0004-8699-0312","affiliations":[{"raw_affiliation_string":"Computational and Data Sciences, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Talking Yak English Learning Private Limited, Bangalore, India","institution_ids":["https://openalex.org/I1287097855"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100016662","display_name":"Silvester John Joseph Kennedy","orcid":null},"institutions":[{"id":"https://openalex.org/I1287097855","display_name":"Atkins (United States)","ror":"https://ror.org/05df6vn84","country_code":"US","type":"company","lineage":["https://openalex.org/I1287097855","https://openalex.org/I4210094877"]},{"id":"https://openalex.org/I4210143412","display_name":"Talking Lights (United States)","ror":"https://ror.org/04awqpv94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143412"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Silvester John Joseph Kennedy","raw_affiliation_strings":["Talking Yak English Learning Private Limited, Bangalore, India","Talking Yak, Inc., WI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Talking Yak English Learning Private Limited, Bangalore, India","institution_ids":["https://openalex.org/I1287097855"]},{"raw_affiliation_string":"Talking Yak, Inc., WI, USA","institution_ids":["https://openalex.org/I4210143412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115558750","display_name":"Thomas Latinovich","orcid":null},"institutions":[{"id":"https://openalex.org/I1287097855","display_name":"Atkins (United States)","ror":"https://ror.org/05df6vn84","country_code":"US","type":"company","lineage":["https://openalex.org/I1287097855","https://openalex.org/I4210094877"]},{"id":"https://openalex.org/I4210143412","display_name":"Talking Lights (United States)","ror":"https://ror.org/04awqpv94","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143412"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas Latinovich","raw_affiliation_strings":["Talking Yak English Learning Private Limited, Bangalore, India","Talking Yak, Inc., WI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Talking Yak English Learning Private Limited, Bangalore, India","institution_ids":["https://openalex.org/I1287097855"]},{"raw_affiliation_string":"Talking Yak, Inc., WI, USA","institution_ids":["https://openalex.org/I4210143412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076749495","display_name":"Deepak Subramani","orcid":"https://orcid.org/0000-0002-5972-8878"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Deepak Subramani","raw_affiliation_strings":["Computational and Data Sciences, Indian Institute of Science, Bangalore, India"],"raw_orcid":"https://orcid.org/0000-0002-5972-8878","affiliations":[{"raw_affiliation_string":"Computational and Data Sciences, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5099785482"],"corresponding_institution_ids":["https://openalex.org/I1287097855","https://openalex.org/I59270414"],"apc_list":{"value":1800,"currency":"USD","value_usd":1800},"apc_paid":{"value":1800,"currency":"USD","value_usd":1800},"fwci":4.5354,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95160676,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":"10","issue":null,"first_page":"100539","last_page":"100539"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.391400009393692,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.391400009393692,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.22040000557899475,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08609999716281891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6097000241279602},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4812000095844269},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.46639999747276306},{"id":"https://openalex.org/keywords/lexical-diversity","display_name":"Lexical diversity","score":0.4334999918937683},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.3855000138282776},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.36320000886917114},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.3278999924659729}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.779699981212616},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6215000152587891},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6097000241279602},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.536899983882904},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4812000095844269},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.46639999747276306},{"id":"https://openalex.org/C2781202465","wikidata":"https://www.wikidata.org/wiki/Q18346297","display_name":"Lexical diversity","level":3,"score":0.4334999918937683},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.3855000138282776},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33169999718666077},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C2778883600","wikidata":"https://www.wikidata.org/wiki/Q2390977","display_name":"Language proficiency","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.caeai.2025.100539","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.caeai.2025.100539","pdf_url":null,"source":{"id":"https://openalex.org/S4210183364","display_name":"Computers and Education Artificial Intelligence","issn_l":"2666-920X","issn":["2666-920X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers and Education: Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.caeai.2025.100539","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.caeai.2025.100539","pdf_url":null,"source":{"id":"https://openalex.org/S4210183364","display_name":"Computers and Education Artificial Intelligence","issn_l":"2666-920X","issn":["2666-920X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers and Education: Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1984748091","https://openalex.org/W1991335324","https://openalex.org/W2138804265","https://openalex.org/W3100449273","https://openalex.org/W3205296127","https://openalex.org/W4309674289","https://openalex.org/W4310436400","https://openalex.org/W4318480794","https://openalex.org/W4320728802","https://openalex.org/W4323655724","https://openalex.org/W4399528455"],"related_works":[],"abstract_inverted_index":{"Relying":[0],"on":[1,229],"human":[2],"experts":[3],"to":[4,37,68,120,145,237,279],"evaluate":[5,55,257],"the":[6,39,56,77,115,125,162,166,174,187,234],"Common":[7],"European":[8],"Framework":[9],"of":[10,41,58,140,149,165,177,190,202,207,209],"Reference":[11],"for":[12,160,172,185,269],"Languages":[13],"(CEFR)":[14],"speaking":[15,45,80,169,263],"assessments":[16,31,46],"in":[17,47,76,82,156],"an":[18,198],"e-learning":[19,48],"environment":[20],"creates":[21],"scalability":[22],"challenges,":[23],"as":[24],"it":[25],"limits":[26],"how":[27],"quickly":[28],"and":[29,62,85,124,179,183,192,221,258,283,290],"widely":[30],"can":[32,255],"be":[33],"conducted.":[34],"We":[35],"aim":[36],"automate":[38],"evaluation":[40],"CEFR":[42,78,121,167,175,188,260,284],"B2":[43,79,122,168,261],"English":[44,116,262],"environments":[49],"from":[50,114],"conversation":[51],"transcripts.":[52],"First,":[53],"we":[54,89,134],"capability":[57],"leading":[59],"open":[60],"source":[61],"commercial":[63],"Large":[64],"Language":[65],"Models":[66],"(LLMs)":[67],"score":[69,259],"a":[70,91,147,205,226,244,266],"candidate\u2019s":[71],"performance":[72,213],"across":[73],"various":[74],"criteria":[75],"exam":[81],"both":[83],"global":[84],"India-specific":[86],"contexts.":[87,240],"Next,":[88],"create":[90],"new":[92,109,132],"expert-validated,":[93],"CEFR-aligned":[94,252],"synthetic":[95],"conversational":[96],"dataset":[97],"with":[98,215,250],"transcripts":[99,232],"that":[100,243],"are":[101,112,159],"rated":[102],"at":[103],"different":[104],"assessment":[105,239,253],"scores.":[106],"In":[107],"addition,":[108],"instruction-tuned":[110],"datasets":[111],"developed":[113],"Vocabulary":[117],"Profile":[118],"(up":[119],"level)":[123],"CEFR-SP":[126],"WikiAuto":[127],"datasets.":[128],"Finally,":[129],"using":[130],"these":[131],"datasets,":[133],"perform":[135],"parameter":[136,246],"efficient":[137],"instruction":[138,248],"tuning":[139],"Mistral":[141],"Instruct":[142],"7B":[143,245],"v0.2":[144],"develop":[146],"family":[148,158],"models":[150,155,218],"called":[151],"EvalYaks":[152,196],".":[153],"Four":[154],"this":[157],"assessing":[161],"four":[163],"sections":[164],"exam,":[170],"one":[171],"identifying":[173],"level":[176,189],"vocabulary":[178],"generating":[180,193],"level-specific":[181,194],"vocabulary,":[182],"another":[184],"detecting":[186],"text":[191],"text.":[195],"achieved":[197],"average":[199],"acceptable":[200],"accuracy":[201],"96":[203],"%,":[204],"degree":[206],"variation":[208],"0.35":[210],"levels,":[211],"achieving":[212],"competitive":[214],"state-of-the-art":[216],"frontier":[217],"like":[219],"GPT-4o":[220],"Gemini":[222],"Flash":[223],"2.5.":[224],"Furthermore,":[225],"pilot":[227],"validation":[228,291],"real-world":[230,238],"learner":[231],"verified":[233],"model\u2019s":[235],"transferability":[236],"This":[241],"demonstrates":[242],"LLM":[247],"tuned":[249],"high-quality":[251],"data":[254,288],"effectively":[256],"assessments,":[264],"offering":[265],"promising":[267],"solution":[268],"scalable,":[270],"automated":[271],"language":[272],"proficiency":[273],"evaluation.":[274],"The":[275],"methodology":[276],"is":[277],"adaptable":[278],"other":[280],"regional":[281],"contexts":[282],"levels":[285],"through":[286],"appropriate":[287],"generation":[289],"protocols.":[292]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-25T08:15:23.626066","created_date":"2025-12-21T00:00:00"}
