{"id":"https://openalex.org/W6910635328","doi":"https://doi.org/10.48550/arxiv.2508.04105","title":"Towards Transparent AI Grading: Semantic Entropy as a Signal for Human-AI Disagreement","display_name":"Towards Transparent AI Grading: Semantic Entropy as a Signal for Human-AI Disagreement","publication_year":2025,"publication_date":"2025-08-06","ids":{"openalex":"https://openalex.org/W6910635328","doi":"https://doi.org/10.48550/arxiv.2508.04105"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2508.04105","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.04105","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2508.04105","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Iyer, Karrtik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iyer, Karrtik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ravikiran, Manikandan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ravikiran, Manikandan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Pendse, Prasanna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pendse, Prasanna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Mohanty, Shayan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mohanty, Shayan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.2694000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.2694000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.18310000002384186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08730000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5873000025749207},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.5684999823570251},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.522599995136261},{"id":"https://openalex.org/keywords/grading","display_name":"Grading (engineering)","score":0.44179999828338623},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.44179999828338623}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6449999809265137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5942000150680542},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5873000025749207},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.5684999823570251},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4499000012874603},{"id":"https://openalex.org/C2777286243","wikidata":"https://www.wikidata.org/wiki/Q5591926","display_name":"Grading (engineering)","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42320001125335693},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.41940000653266907},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3463999927043915},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.26660001277923584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2508.04105","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.04105","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2508.04105","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.04105","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.477958619594574,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automated":[0],"grading":[1,16,145],"systems":[2],"can":[3],"efficiently":[4],"score":[5],"short-answer":[6],"responses,":[7],"yet":[8],"they":[9],"often":[10],"fail":[11],"to":[12,96],"indicate":[13],"when":[14],"a":[15,27,41],"decision":[17],"is":[18],"uncertain":[19],"or":[20],"potentially":[21],"contentious.":[22],"We":[23,71],"introduce":[24],"semantic":[25,78,111,131],"entropy,":[26],"measure":[28],"of":[29,63],"variability":[30],"across":[31,89,119],"multiple":[32],"GPT-4-generated":[33],"explanations":[34],"for":[35,43],"the":[36,61,106],"same":[37],"student":[38],"response,":[39],"as":[40,101,133],"proxy":[42],"human":[44,82],"grader":[45,83],"disagreement.":[46],"By":[47],"clustering":[48],"rationales":[49],"via":[50],"entailment-based":[51],"similarity":[52],"and":[53,121,142],"computing":[54],"entropy":[55,79,112,132],"over":[56],"these":[57],"clusters,":[58],"we":[59],"quantify":[60],"diversity":[62],"justifications":[64],"without":[65],"relying":[66],"on":[67,105],"final":[68],"output":[69],"scores.":[70],"address":[72],"three":[73],"research":[74],"questions:":[75],"(1)":[76],"Does":[77,86],"align":[80],"with":[81,114],"disagreement?":[84],"(2)":[85],"it":[87,94],"generalize":[88],"academic":[90],"subjects?":[91],"(3)":[92],"Is":[93],"sensitive":[95],"structural":[97],"task":[98],"features":[99],"such":[100],"source":[102],"dependency?":[103],"Experiments":[104],"ASAP-SAS":[107],"dataset":[108],"show":[109],"that":[110,138],"correlates":[113],"rater":[115],"disagreement,":[116],"varies":[117],"meaningfully":[118],"subjects,":[120],"increases":[122],"in":[123],"tasks":[124],"requiring":[125],"interpretive":[126],"reasoning.":[127],"Our":[128],"findings":[129],"position":[130],"an":[134],"interpretable":[135],"uncertainty":[136],"signal":[137],"supports":[139],"more":[140],"transparent":[141],"trustworthy":[143],"AI-assisted":[144],"workflows.":[146]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
