{"id":"https://openalex.org/W7161155894","doi":"https://doi.org/10.48550/arxiv.2605.13412","title":"LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics","display_name":"LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161155894","doi":"https://doi.org/10.48550/arxiv.2605.13412"},"language":"en","primary_location":{"id":"pmh:oai:pure.atira.dk:openaire/7fc281db-8b8a-4a4d-b875-191e32165cf6","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/7fc281db-8b8a-4a4d-b875-191e32165cf6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Humblot-Renaux, G, Sabet Jahromi, M N, Bakuri-J\u00f8rgensen, R, Heyl, M A, Stage Jarlner, A S, Vlachou, M, Murphy H\u00f8genhaug, A, Elliott, D, Gammeltoft-Hansen, T & Moeslund, T B 2026, LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics. in 20th Linguistic Annotation Workshop (LAW-XX). Association for Computational Linguistics (ACL).","raw_type":"info:eu-repo/semantics/publishedVersion"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://vbn.aau.dk/da/publications/7fc281db-8b8a-4a4d-b875-191e32165cf6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136097499","display_name":"Galadrielle Humblot-Renaux","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Humblot-Renaux, Galadrielle","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121935830","display_name":"Mohammad N. S. Jahromi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jahromi, Mohammad N. S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136129274","display_name":"Rohat Bakuri-J\u00f8rgensen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bakuri-J\u00f8rgensen, Rohat","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125231240","display_name":"Marieke Anne Heyl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heyl, Marieke Anne","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118988231","display_name":"Asta S. Stage Jarlner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jarlner, Asta S. Stage","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093096673","display_name":"Maria Vlachou","orcid":"https://orcid.org/0009-0008-8685-693X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vlachou, Maria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136149957","display_name":"Anna Murphy H\u00f8genhaug","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H\u00f8genhaug, Anna Murphy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136102708","display_name":"Desmond Elliott","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elliott, Desmond","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136120949","display_name":"Thomas Gammeltoft-Hansen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gammeltoft-Hansen, Thomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136129325","display_name":"Thomas B. Moeslund","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moeslund, Thomas B.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.5181999802589417,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.5181999802589417,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.15469999611377716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.7254999876022339},{"id":"https://openalex.org/keywords/credibility","display_name":"Credibility","score":0.6712999939918518},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5963000059127808},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5353999733924866},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44850000739097595},{"id":"https://openalex.org/keywords/danish","display_name":"Danish","score":0.3961000144481659},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.3817000091075897},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.37869998812675476}],"concepts":[{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.7254999876022339},{"id":"https://openalex.org/C2780224610","wikidata":"https://www.wikidata.org/wiki/Q1530061","display_name":"Credibility","level":2,"score":0.6712999939918518},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6274999976158142},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5963000059127808},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5353999733924866},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5091999769210815},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5051000118255615},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44850000739097595},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4099000096321106},{"id":"https://openalex.org/C164622146","wikidata":"https://www.wikidata.org/wiki/Q9035","display_name":"Danish","level":2,"score":0.3961000144481659},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.3817000091075897},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.37869998812675476},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3546999990940094},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3544999957084656},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.33219999074935913},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3215999901294708},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3206000030040741},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C2780608745","wikidata":"https://www.wikidata.org/wiki/Q367293","display_name":"Convention","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2863999903202057},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C124913957","wikidata":"https://www.wikidata.org/wiki/Q1232548","display_name":"Zoom","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C2776548248","wikidata":"https://www.wikidata.org/wiki/Q12621536","display_name":"Judgement","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:pure.atira.dk:openaire/7fc281db-8b8a-4a4d-b875-191e32165cf6","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/7fc281db-8b8a-4a4d-b875-191e32165cf6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Humblot-Renaux, G, Sabet Jahromi, M N, Bakuri-J\u00f8rgensen, R, Heyl, M A, Stage Jarlner, A S, Vlachou, M, Murphy H\u00f8genhaug, A, Elliott, D, Gammeltoft-Hansen, T & Moeslund, T B 2026, LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics. in 20th Linguistic Annotation Workshop (LAW-XX). Association for Computational Linguistics (ACL).","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"doi:10.48550/arxiv.2605.13412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:openaire/7fc281db-8b8a-4a4d-b875-191e32165cf6","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/7fc281db-8b8a-4a4d-b875-191e32165cf6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Humblot-Renaux, G, Sabet Jahromi, M N, Bakuri-J\u00f8rgensen, R, Heyl, M A, Stage Jarlner, A S, Vlachou, M, Murphy H\u00f8genhaug, A, Elliott, D, Gammeltoft-Hansen, T & Moeslund, T B 2026, LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics. in 20th Linguistic Annotation Workshop (LAW-XX). Association for Computational Linguistics (ACL).","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7802317142486572,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Off-the-shelf":[0],"large":[1],"language":[2],"models":[3,80,113],"(LLMs)":[4],"are":[5,177],"increasingly":[6],"used":[7],"to":[8,161],"automate":[9],"text":[10,58],"annotation,":[11],"yet":[12],"their":[13],"effectiveness":[14],"remains":[15],"underexplored":[16],"for":[17,35,86,99,142],"underrepresented":[18],"languages":[19],"and":[20,44,65,72,81,89,96,101,114,127,130,152,158,175],"specialized":[21],"domains":[22],"where":[23],"the":[24,42,92,108,138,150,159,164],"class":[25],"definition":[26],"requires":[27],"subtle":[28],"expert":[29,63],"understanding.":[30],"We":[31,53,76,104],"investigate":[32],"LLM-based":[33],"annotation":[34],"a":[36,56,167],"novel":[37],"legal":[38],"NLP":[39],"task:":[40],"identifying":[41],"presence":[43],"sentiment":[45],"of":[46,94,132,140,145,155,166],"credibility":[47],"assessments":[48],"in":[49,106],"asylum":[50,73,146],"decision":[51],"texts.":[52],"introduce":[54],"RAB-Cred,":[55],"Danish":[57],"classification":[59],"dataset":[60,174],"featuring":[61],"high-quality,":[62],"annotations":[64],"valuable":[66],"metadata":[67],"such":[68],"as":[69],"annotator":[70],"confidence":[71,126],"case":[74],"outcome.":[75],"benchmark":[77],"21":[78],"open-weight":[79],"30":[82],"system-user":[83],"prompt":[84,97],"combinations":[85],"this":[87],"task,":[88],"systematically":[90],"evaluate":[91],"effect":[93],"model":[95],"choice":[98],"zero-shot":[100],"few-shot":[102],"classification.":[103],"zoom":[105],"on":[107],"errors":[109],"made":[110],"by":[111],"top-performing":[112],"prompts,":[115],"investigating":[116],"error":[117],"consistency":[118],"across":[119],"LLMs,":[120],"inter-class":[121],"confusion,":[122],"correlation":[123],"with":[124],"human":[125],"sample-wise":[128],"difficulty":[129],"severity":[131],"LLM":[133,156],"mistakes.":[134],"Our":[135],"results":[136],"confirm":[137],"potential":[139],"LLMs":[141],"cost-effective":[143],"labeling":[144],"decisions,":[147],"but":[148],"highlight":[149],"imperfect":[151],"inconsistent":[153],"nature":[154],"annotators,":[157],"need":[160],"look":[162],"beyond":[163],"predictions":[165],"single,":[168],"arbitrarily":[169],"chosen":[170],"model.":[171],"The":[172],"RAB-Cred":[173],"code":[176],"available":[178],"at":[179],"https://github.com/glhr/RAB-Cred":[180]},"counts_by_year":[],"updated_date":"2026-07-01T08:55:40.977307","created_date":"2026-05-15T00:00:00"}
