{"id":"https://openalex.org/W7125709035","doi":"https://doi.org/10.48550/arxiv.2601.16314","title":"Machine-Assisted Grading of Nationwide School-Leaving Essay Exams with LLMs and Statistical NLP","display_name":"Machine-Assisted Grading of Nationwide School-Leaving Essay Exams with LLMs and Statistical NLP","publication_year":2026,"publication_date":"2026-01-22","ids":{"openalex":"https://openalex.org/W7125709035","doi":"https://doi.org/10.48550/arxiv.2601.16314"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.16314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.16314","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123846263","display_name":"Andres Karjus","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karjus, Andres","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123835923","display_name":"Kais Allkivi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Allkivi, Kais","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115514670","display_name":"Silvia Maine","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maine, Silvia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095743742","display_name":"Katarin Leppik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leppik, Katarin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123863093","display_name":"Krister Kruusmaa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kruusmaa, Krister","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123819440","display_name":"Merilin Aruvee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aruvee, Merilin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5123846263"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.2110999971628189,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.2110999971628189,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.07289999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10959","display_name":"Student Assessment and Feedback","score":0.07010000199079514,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.8134999871253967},{"id":"https://openalex.org/keywords/grading","display_name":"Grading (engineering)","score":0.7387999892234802},{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.6783000230789185},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4700999855995178},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.4456999897956848},{"id":"https://openalex.org/keywords/argumentation-theory","display_name":"Argumentation theory","score":0.34310001134872437}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.8134999871253967},{"id":"https://openalex.org/C2777286243","wikidata":"https://www.wikidata.org/wiki/Q5591926","display_name":"Grading (engineering)","level":2,"score":0.7387999892234802},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.6783000230789185},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5264999866485596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4986000061035156},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4700999855995178},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4632999897003174},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.4456999897956848},{"id":"https://openalex.org/C509550671","wikidata":"https://www.wikidata.org/wiki/Q126945","display_name":"Medical education","level":1,"score":0.36489999294281006},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34769999980926514},{"id":"https://openalex.org/C65059942","wikidata":"https://www.wikidata.org/wiki/Q270105","display_name":"Argumentation theory","level":2,"score":0.34310001134872437},{"id":"https://openalex.org/C2779529714","wikidata":"https://www.wikidata.org/wiki/Q2632744","display_name":"Graduation (instrument)","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C2777898490","wikidata":"https://www.wikidata.org/wiki/Q17157236","display_name":"Writing assessment","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C120912362","wikidata":"https://www.wikidata.org/wiki/Q136822","display_name":"Higher education","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2718000113964081}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.16314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.16314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7838408350944519,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,90],"models":[2],"(LLMs)":[3],"enable":[4],"rapid":[5],"and":[6,18,84,87,114,130,187,213,218],"consistent":[7],"automated":[8,61,103],"evaluation":[9],"of":[10,16,36,60,67,71,111],"open-ended":[11],"exam":[12,69,188],"responses,":[13],"including":[14],"dimensions":[15],"content":[17],"argumentation":[19],"that":[20,102,110,138,176,194],"have":[21],"traditionally":[22],"required":[23],"human":[24,96,112,120,211],"judgment.":[25],"This":[26],"is":[27,145,160],"particularly":[28,151],"important":[29],"in":[30,42,52,205],"cases":[31],"where":[32],"a":[33,43,139,164,201,206],"large":[34,65],"amount":[35],"exams":[37,51],"need":[38],"to":[39,109,116,162,180],"be":[40,178,198],"graded":[41],"limited":[44],"time":[45],"frame,":[46],"such":[47],"as":[48,132],"nation-wide":[49],"graduation":[50],"various":[53],"countries.":[54],"Here,":[55],"we":[56],"examine":[57],"the":[58,80,119,170],"applicability":[59],"scoring":[62,104,121,143],"on":[63],"two":[64,72],"datasets":[66],"trial":[68],"essays":[70],"full":[73],"national":[74,202],"cohorts":[75],"from":[76],"Estonia.":[77],"We":[78,123],"operationalize":[79],"official":[81],"curriculum-based":[82],"rubric":[83],"compare":[85],"LLM":[86],"statistical":[88],"natural":[89],"processing":[91],"(NLP)":[92],"based":[93],"assessments":[94],"with":[95,215],"panel":[97],"scores.":[98],"The":[99,190],"results":[100],"show":[101],"can":[105,177,197],"achieve":[106],"performance":[107],"comparable":[108],"raters":[113],"tends":[115],"fall":[117],"within":[118],"range.":[122],"also":[124],"evaluate":[125],"bias,":[126],"prompt":[127],"injection":[128],"risks,":[129],"LLMs":[131],"essay":[133],"writers.":[134],"These":[135],"findings":[136],"demonstrate":[137],"principled,":[140],"rubric-driven,":[141],"human-in-the-loop":[142],"pipeline":[144],"viable":[146],"for":[147,153,185],"high-stakes":[148],"writing":[149],"assessment,":[150],"relevant":[152],"digitally":[154],"advanced":[155],"societies":[156],"like":[157],"Estonia,":[158],"which":[159],"about":[161],"adapt":[163],"fully":[165],"electronic":[166],"examination":[167],"system.":[168],"Furthermore,":[169],"system":[171],"produces":[172],"fine-grained":[173],"subscore":[174],"profiles":[175],"used":[179],"generate":[181],"systematic,":[182],"personalized":[183],"feedback":[184],"instruction":[186],"preparation.":[189],"study":[191],"provides":[192],"evidence":[193],"LLM-assisted":[195],"assessment":[196],"implemented":[199],"at":[200],"scale,":[203],"even":[204],"small-language":[207],"context,":[208],"while":[209],"maintaining":[210],"oversight":[212],"compliance":[214],"emerging":[216],"educational":[217],"regulatory":[219],"standards.":[220]},"counts_by_year":[],"updated_date":"2026-01-27T23:31:10.601533","created_date":"2026-01-27T00:00:00"}
