{"id":"https://openalex.org/W7155621649","doi":"https://doi.org/10.1145/3785022.3785070","title":"Enhancing LLM-Based Data Annotation with Error Decomposition","display_name":"Enhancing LLM-Based Data Annotation with Error Decomposition","publication_year":2026,"publication_date":"2026-04-25","ids":{"openalex":"https://openalex.org/W7155621649","doi":"https://doi.org/10.1145/3785022.3785070"},"language":null,"primary_location":{"id":"doi:10.1145/3785022.3785070","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3785022.3785070","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the LAK26: 16th International Learning Analytics and Knowledge Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3785022.3785070","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103027897","display_name":"Zhen Xu","orcid":"https://orcid.org/0009-0004-3131-910X"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhen Xu","raw_affiliation_strings":["Columbia University, New York, USA"],"raw_orcid":"https://orcid.org/0009-0004-3131-910X","affiliations":[{"raw_affiliation_string":"Columbia University, New York, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035648371","display_name":"Vedant Khatri","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vedant Khatri","raw_affiliation_strings":["University of California, Irvine, Irvine, USA"],"raw_orcid":"https://orcid.org/0000-0002-4275-0745","affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123488504","display_name":"Yijun Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yijun Dai","raw_affiliation_strings":["Columbia University, New York, USA"],"raw_orcid":"https://orcid.org/0009-0000-7258-8374","affiliations":[{"raw_affiliation_string":"Columbia University, New York, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013458346","display_name":"Xiner Liu","orcid":"https://orcid.org/0009-0004-3796-2251"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]},{"id":"https://openalex.org/I922845939","display_name":"Philadelphia University","ror":"https://ror.org/03zzmyz63","country_code":"US","type":"education","lineage":["https://openalex.org/I922845939"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiner Liu","raw_affiliation_strings":["University of Pennsylvania, Philadelphia, USA"],"raw_orcid":"https://orcid.org/0009-0004-3796-2251","affiliations":[{"raw_affiliation_string":"University of Pennsylvania, Philadelphia, USA","institution_ids":["https://openalex.org/I922845939","https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035805420","display_name":"S Li","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siyan Li","raw_affiliation_strings":["Columbia University, New York, USA"],"raw_orcid":"https://orcid.org/0009-0002-1465-6337","affiliations":[{"raw_affiliation_string":"Columbia University, New York, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056367146","display_name":"Xuanming Zhang","orcid":"https://orcid.org/0000-0003-2776-0344"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuanming Zhang","raw_affiliation_strings":["Columbia University, New York, USA"],"raw_orcid":"https://orcid.org/0000-0003-2776-0344","affiliations":[{"raw_affiliation_string":"Columbia University, New York, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047810054","display_name":"Renzhe Yu","orcid":"https://orcid.org/0000-0002-2375-3537"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Renzhe Yu","raw_affiliation_strings":["Columbia University, New York, USA"],"raw_orcid":"https://orcid.org/0000-0002-2375-3537","affiliations":[{"raw_affiliation_string":"Columbia University, New York, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103027897"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95623067,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"325","last_page":"335"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.13840000331401825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.13840000331401825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09019999951124191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.06599999964237213,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.9384999871253967},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.6919000148773193},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3840000033378601},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3799999952316284},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.3619000017642975},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.34290000796318054},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.3228999972343445},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3050000071525574}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.9384999871253967},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.6919000148773193},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6883000135421753},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5236999988555908},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5218999981880188},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.424699991941452},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3840000033378601},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37439998984336853},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.34290000796318054},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.3228999972343445},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C7044111","wikidata":"https://www.wikidata.org/wiki/Q15844891","display_name":"Temporal annotation","level":5,"score":0.29179999232292175},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2653999924659729},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.2549000084400177},{"id":"https://openalex.org/C13606891","wikidata":"https://www.wikidata.org/wiki/Q2623243","display_name":"Conceptual model","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3785022.3785070","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3785022.3785070","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the LAK26: 16th International Learning Analytics and Knowledge Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3785022.3785070","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3785022.3785070","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the LAK26: 16th International Learning Analytics and Knowledge Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5813878178596497}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1515847863","https://openalex.org/W2002097807","https://openalex.org/W2013378101","https://openalex.org/W2034059165","https://openalex.org/W2037789405","https://openalex.org/W2049390752","https://openalex.org/W2057720927","https://openalex.org/W2141966633","https://openalex.org/W2143747826","https://openalex.org/W2147638277","https://openalex.org/W2251930319","https://openalex.org/W2252039660","https://openalex.org/W2291955298","https://openalex.org/W3001362559","https://openalex.org/W3098042509","https://openalex.org/W4303614602","https://openalex.org/W4313379915","https://openalex.org/W4380763235","https://openalex.org/W4384662964","https://openalex.org/W4385571451","https://openalex.org/W4387824566","https://openalex.org/W4387851331","https://openalex.org/W4388552174","https://openalex.org/W4389518941","https://openalex.org/W4389636360","https://openalex.org/W4392943286","https://openalex.org/W4396786654","https://openalex.org/W4396806295","https://openalex.org/W4398150831","https://openalex.org/W4403970100","https://openalex.org/W4404783774","https://openalex.org/W4405654390","https://openalex.org/W4405931175","https://openalex.org/W4408504572","https://openalex.org/W4411119592","https://openalex.org/W4411439845","https://openalex.org/W4412966388","https://openalex.org/W4416034648","https://openalex.org/W7131855570","https://openalex.org/W7133232943"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"offer":[4],"a":[5,67,92,98,138,161,175,239,248],"scalable":[6],"alternative":[7],"to":[8,56,101,166,178],"human":[9,163],"coding":[10],"for":[11,210,251,258],"data":[12],"annotation":[13,36,42,64,110,125,144,164,182,194,219,253],"tasks,":[14,37,43,126,195],"enabling":[15],"the":[16,228,245],"scale-up":[17],"of":[18,79,114,230,247],"research":[19],"across":[20],"data-intensive":[21],"domains":[22],"such":[23,44],"as":[24,45],"learning":[25],"analytics.":[26],"While":[27],"LLMs":[28],"are":[29,128],"already":[30],"achieving":[31],"near-human":[32],"accuracy":[33],"on":[34,40,123,191],"objective":[35],"their":[38,115],"performance":[39],"subjective":[41,131],"those":[46],"involving":[47],"psychological":[48],"constructs,":[49],"is":[50,215],"less":[51],"consistent":[52],"and":[53,108,153,173,201,221,254],"more":[54],"prone":[55],"errors.":[57],"Standard":[58],"evaluation":[59,94],"practices":[60],"typically":[61],"collapse":[62],"all":[63],"errors":[65,80,145,183],"into":[66],"single":[68,223],"alignment":[69,214,224],"metric,":[70],"but":[71],"this":[72,121,189],"simplified":[73],"approach":[74],"may":[75],"obscure":[76],"different":[77,87],"kinds":[78],"that":[81,96,141,243],"affect":[82],"final":[83],"analytical":[84],"conclusions":[85],"in":[86,112,130,217],"ways.":[88],"Here,":[89],"we":[90],"propose":[91],"diagnostic":[93,139,241],"paradigm":[95,122,135,190,236],"incorporates":[97],"human-in-the-loop":[99],"step":[100],"separate":[102],"task-inherent":[103,168],"ambiguity":[104,156,169],"from":[105,170],"model-driven":[106],"inaccuracies":[107],"assess":[109],"quality":[111,229],"terms":[113],"potential":[116],"downstream":[117],"impacts.":[118],"We":[119,187],"refine":[120],"ordinal":[124],"which":[127],"common":[129],"annotation.":[132],"The":[133],"refined":[134],"includes:":[136],"(1)":[137],"taxonomy":[140],"categorizes":[142],"LLM":[143,171,181,231,252],"along":[146],"two":[147],"dimensions:":[148],"source":[149],"(model-specific":[150],"vs.":[151,157],"task-inherent)":[152],"type":[154],"(boundary":[155],"conceptual":[158,199],"misidentification);":[159],"(2)":[160],"lightweight":[162],"test":[165],"estimate":[167],"annotations;":[172],"(3)":[174],"computational":[176],"method":[177],"decompose":[179],"observed":[180],"following":[184],"our":[185,205,235],"taxonomy.":[186],"validate":[188],"four":[192],"educational":[193],"demonstrating":[196],"both":[197],"its":[198],"validity":[200],"practical":[202],"utility.":[203],"Theoretically,":[204],"work":[206],"provides":[207,255],"empirical":[208],"evidence":[209],"why":[211,222],"excessively":[212],"high":[213],"unrealistic":[216],"specific":[218],"tasks":[220],"metrics":[225],"inadequately":[226],"reflect":[227],"annotations.":[232],"In":[233],"practice,":[234],"can":[237],"be":[238],"low-cost":[240],"tool":[242],"assesses":[244],"suitability":[246],"given":[249],"task":[250],"actionable":[256],"insights":[257],"further":[259],"technical":[260],"optimization.":[261]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-26T00:00:00"}
