{"id":"https://openalex.org/W7139114783","doi":"https://doi.org/10.48550/arxiv.2603.16357","title":"Beyond Grading Accuracy: Exploring Alignment of TAs and LLMs","display_name":"Beyond Grading Accuracy: Exploring Alignment of TAs and LLMs","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7139114783","doi":"https://doi.org/10.48550/arxiv.2603.16357"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16357","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130030380","display_name":"Matthijs Jansen op de Haar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de Haar, Matthijs Jansen op","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049881419","display_name":"Nacir Bouali","orcid":"https://orcid.org/0000-0001-7465-9543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bouali, Nacir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089107604","display_name":"Faizan Ahmed","orcid":"https://orcid.org/0000-0002-2760-6892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed, Faizan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.35580000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.35580000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.10610000044107437,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10636","display_name":"Innovative Teaching and Learning Methods","score":0.062199998646974564,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grading","display_name":"Grading (engineering)","score":0.8766000270843506},{"id":"https://openalex.org/keywords/class-diagram","display_name":"Class diagram","score":0.59579998254776},{"id":"https://openalex.org/keywords/unified-modeling-language","display_name":"Unified Modeling Language","score":0.5873000025749207},{"id":"https://openalex.org/keywords/activity-diagram","display_name":"Activity diagram","score":0.35910001397132874},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.2750000059604645}],"concepts":[{"id":"https://openalex.org/C2777286243","wikidata":"https://www.wikidata.org/wiki/Q5591926","display_name":"Grading (engineering)","level":2,"score":0.8766000270843506},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.603600025177002},{"id":"https://openalex.org/C202446494","wikidata":"https://www.wikidata.org/wiki/Q664166","display_name":"Class diagram","level":4,"score":0.59579998254776},{"id":"https://openalex.org/C145644426","wikidata":"https://www.wikidata.org/wiki/Q169411","display_name":"Unified Modeling Language","level":3,"score":0.5873000025749207},{"id":"https://openalex.org/C64219723","wikidata":"https://www.wikidata.org/wiki/Q423262","display_name":"Activity diagram","level":4,"score":0.35910001397132874},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.34619998931884766},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33869999647140503},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26919999718666077},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2565000057220459},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.2556000053882599}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7208114266395569}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,21],"this":[1,108],"paper,":[2],"we":[3,31,75],"investigate":[4],"the":[5,101],"potential":[6],"of":[7,103,114,153,162,185],"open-source":[8,132,176,208],"Large":[9],"Language":[10,17],"Models":[11],"(LLMs)":[12],"for":[13,40],"grading":[14,66,78,195,216,222],"Unified":[15],"Modeling":[16],"(UML)":[18],"class":[19,84,117,214],"diagrams.":[20],"contrast":[22],"to":[23,155,164,183,231],"existing":[24,49],"work,":[25],"which":[26,81],"primarily":[27],"evaluates":[28],"proprietary":[29],"LLMs,":[30],"focus":[32],"on":[33],"non-proprietary":[34],"models,":[35],"making":[36],"our":[37],"approach":[38,230],"suitable":[39],"universities":[41],"where":[42,142,197],"transparency":[43],"and":[44,94,157],"cost":[45],"are":[46,86,97,199],"critical.":[47],"Additionally,":[48],"studies":[50],"assess":[51],"performance":[52,181],"over":[53,170],"complete":[54],"diagrams":[55,85,118],"rather":[56],"than":[57],"individual":[58,104,138],"criteria,":[59,139],"offering":[60],"limited":[61],"insight":[62],"into":[63],"how":[64],"automated":[65],"aligns":[67],"with":[68,221,235],"human":[69,146],"evaluation.":[70],"To":[71],"address":[72],"these":[73],"gaps,":[74],"propose":[76],"a":[77,111,120,158,167,186,189,193,228],"pipeline":[79,109,226],"in":[80,201],"student-generated":[82],"UML":[83,116,213],"independently":[87],"evaluated":[88],"by":[89,130,217],"both":[90],"teaching":[91],"assistants":[92],"(TAs)":[93],"LLMs.":[95,133],"Grades":[96],"then":[98],"compared":[99],"at":[100],"level":[102],"criteria.":[105,223],"We":[106],"evaluate":[107],"through":[110],"quantitative":[112],"study":[113],"92":[115],"from":[119,145],"software":[121],"design":[122],"course,":[123],"comparing":[124],"TA":[125],"grades":[126],"against":[127],"assessments":[128],"produced":[129],"six":[131],"Performance":[134],"is":[135],"measured":[136],"across":[137],"highlighting":[140],"areas":[141],"LLMs":[143,209],"diverge":[144],"graders.":[147],"Our":[148,204],"results":[149],"show":[150],"per-criterion":[151],"accuracy":[152],"up":[154,163],"88.56\\%":[156],"Pearson":[159],"correlation":[160],"coefficient":[161],"0.78,":[165],"representing":[166],"substantial":[168],"improvement":[169],"previous":[171],"work":[172],"while":[173],"using":[174],"only":[175],"models.":[177],"The":[178,224],"models":[179],"achieve":[180],"close":[182],"that":[184,207],"TA,":[187],"suggesting":[188],"possible":[190],"path":[191],"toward":[192],"mixed-initiative":[194],"system,":[196],"TAs":[198],"aided":[200],"their":[202],"grading.":[203],"findings":[205],"demonstrate":[206],"can":[210],"effectively":[211],"support":[212],"diagram":[215],"explicitly":[218],"identifying":[219],"alignment":[220],"proposed":[225],"provides":[227],"practical":[229],"managing":[232],"increasing":[233],"workloads":[234],"growing":[236],"student":[237],"counts.":[238]},"counts_by_year":[],"updated_date":"2026-06-20T20:08:15.867695","created_date":"2026-03-20T00:00:00"}
