{"id":"https://openalex.org/W4404344583","doi":"https://doi.org/10.48550/arxiv.2411.00034","title":"Is Our Chatbot Telling Lies? Assessing Correctness of an LLM-based Dutch Support Chatbot","display_name":"Is Our Chatbot Telling Lies? Assessing Correctness of an LLM-based Dutch Support Chatbot","publication_year":2024,"publication_date":"2024-10-29","ids":{"openalex":"https://openalex.org/W4404344583","doi":"https://doi.org/10.48550/arxiv.2411.00034"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.00034","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00034","pdf_url":"https://arxiv.org/pdf/2411.00034","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.00034","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114634700","display_name":"Herman Lassche","orcid":"https://orcid.org/0009-0005-8764-4988"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Lassche, Herman","raw_affiliation_strings":["AFAS Software","University Groningen"],"affiliations":[{"raw_affiliation_string":"AFAS Software","institution_ids":[]},{"raw_affiliation_string":"University Groningen","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006150532","display_name":"Michiel Overeem","orcid":"https://orcid.org/0000-0003-4807-4124"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Overeem, Michiel","raw_affiliation_strings":["AFAS Software"],"affiliations":[{"raw_affiliation_string":"AFAS Software","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076279316","display_name":"Ayushi Rastogi","orcid":"https://orcid.org/0000-0002-0939-6887"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Rastogi, Ayushi","raw_affiliation_strings":["University Groningen"],"affiliations":[{"raw_affiliation_string":"University Groningen","institution_ids":["https://openalex.org/I169381384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5114634700"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9459999799728394,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9459999799728394,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13710","display_name":"European and International Law Studies","score":0.9053000211715698,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chatbot","display_name":"Chatbot","score":0.98460853099823},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6939355731010437},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4739156663417816},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.42765650153160095},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.36416542530059814},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12983855605125427}],"concepts":[{"id":"https://openalex.org/C2779041454","wikidata":"https://www.wikidata.org/wiki/Q870780","display_name":"Chatbot","level":2,"score":0.98460853099823},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6939355731010437},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4739156663417816},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.42765650153160095},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36416542530059814},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12983855605125427}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.00034","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00034","pdf_url":"https://arxiv.org/pdf/2411.00034","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.00034","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.00034","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.00034","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00034","pdf_url":"https://arxiv.org/pdf/2411.00034","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4383501580","https://openalex.org/W4214931137","https://openalex.org/W4313813117","https://openalex.org/W4382052417","https://openalex.org/W3192088754","https://openalex.org/W4387007686","https://openalex.org/W3084631705"],"abstract_inverted_index":{"Companies":[0],"support":[1,40,104,129,166],"their":[2,11],"customers":[3],"using":[4],"live":[5],"chats":[6],"and":[7,54,82,117,202,207,218],"chatbots":[8],"to":[9,19,28,34,43,69,93,122,143,158,210,215],"gain":[10],"loyalty.":[12],"AFAS":[13,107],"is":[14,47,68,80,90],"a":[15,51,76,98,135,200],"Dutch":[16],"company":[17],"aiming":[18],"leverage":[20],"the":[21,66,86,91,95,103,124,127,177,182],"opportunity":[22],"large":[23,77],"language":[24,78,115,217],"models":[25],"(LLMs)":[26],"offer":[27],"answer":[29,73,119],"customer":[30,39,128],"queries":[31],"with":[32,60,213],"minimal":[33,61],"no":[35],"input":[36],"from":[37],"its":[38,44],"team.":[41,130],"Adding":[42],"complexity,":[45],"it":[46,84,140],"unclear":[48],"what":[49],"makes":[50,108],"response":[52,99,137],"correct,":[53],"that":[55],"too":[56],"in":[57,174],"Dutch.":[58],"Further,":[59],"data":[62],"available":[63],"for":[64,184,204],"training,":[65],"challenge":[67],"identify":[70,171],"whether":[71],"an":[72],"generated":[74],"by":[75],"model":[79],"correct":[81],"do":[83],"on":[85,101,113],"fly.":[87],"This":[88,179],"study":[89],"first":[92],"define":[94],"correctness":[96,212],"of":[97,126,176],"based":[100],"how":[102,160],"team":[105],"at":[106],"decisions.":[109],"It":[110],"leverages":[111],"literature":[112],"natural":[114],"generation":[116],"automated":[118,163],"grading":[120],"systems":[121],"automate":[123],"decision-making":[125],"We":[131],"investigated":[132],"questions":[133],"requiring":[134],"binary":[136],"(e.g.,":[138,150],"Would":[139],"be":[141],"possible":[142],"adjust":[144,154],"tax":[145,155],"rates":[146],"manually?)":[147,157],"or":[148,193],"instructions":[149],"How":[151],"would":[152],"I":[153],"rate":[156],"test":[159],"close":[161],"our":[162,188],"approach":[164,169],"reaches":[165],"rating.":[167],"Our":[168],"can":[170],"wrong":[172],"messages":[173],"55\\%":[175],"cases.":[178],"work":[180],"demonstrates":[181],"potential":[183],"automatically":[185],"assessing":[186,205],"when":[187],"chatbot":[189],"may":[190],"provide":[191],"incorrect":[192],"misleading":[194],"answers.":[195],"Specifically,":[196],"we":[197],"contribute":[198],"(1)":[199],"definition":[201],"metrics":[203],"correctness,":[206],"(2)":[208],"suggestions":[209],"improve":[211],"respect":[214],"regional":[216],"question":[219],"type.":[220]},"counts_by_year":[],"updated_date":"2025-11-19T23:35:23.961156","created_date":"2024-11-14T00:00:00"}
