{"id":"https://openalex.org/W4384918632","doi":"https://doi.org/10.1145/3607199.3607237","title":"Understanding Multi-Turn Toxic Behaviors in Open-Domain Chatbots","display_name":"Understanding Multi-Turn Toxic Behaviors in Open-Domain Chatbots","publication_year":2023,"publication_date":"2023-10-03","ids":{"openalex":"https://openalex.org/W4384918632","doi":"https://doi.org/10.1145/3607199.3607237"},"language":"en","primary_location":{"id":"doi:10.1145/3607199.3607237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3607199.3607237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2307.09579","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076545839","display_name":"Bocheng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bocheng Chen","raw_affiliation_strings":["Michigan State University, USA"],"raw_orcid":"https://orcid.org/0009-0001-0471-7063","affiliations":[{"raw_affiliation_string":"Michigan State University, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083819169","display_name":"Guangjing Wang","orcid":"https://orcid.org/0000-0002-9353-9042"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangjing Wang","raw_affiliation_strings":["Michigan State University, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-9353-9042","affiliations":[{"raw_affiliation_string":"Michigan State University, United States of America","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035545617","display_name":"Hanqing Guo","orcid":"https://orcid.org/0000-0003-3779-4679"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanqing Guo","raw_affiliation_strings":["Michigan State University, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-3779-4679","affiliations":[{"raw_affiliation_string":"Michigan State University, United States of America","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039317227","display_name":"Y. Wang","orcid":"https://orcid.org/0009-0008-2062-9013"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuanda Wang","raw_affiliation_strings":["Michigan State University, United States of America"],"raw_orcid":"https://orcid.org/0009-0008-2062-9013","affiliations":[{"raw_affiliation_string":"Michigan State University, United States of America","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042277127","display_name":"Qiben Yan","orcid":"https://orcid.org/0000-0001-6272-7668"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qiben Yan","raw_affiliation_strings":["Michigan State University, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-6272-7668","affiliations":[{"raw_affiliation_string":"Michigan State University, United States of America","institution_ids":["https://openalex.org/I87216513"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5076545839"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":null,"apc_paid":null,"fwci":2.8969,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.9258351,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"282","last_page":"296"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9142000079154968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.911300003528595,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chatbot","display_name":"Chatbot","score":0.9730130434036255},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.9243991374969482},{"id":"https://openalex.org/keywords/open-domain","display_name":"Open domain","score":0.7394742369651794},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7358880043029785},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5765347480773926},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.44841068983078003},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.44384053349494934},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4157698452472687},{"id":"https://openalex.org/keywords/turn-taking","display_name":"Turn-taking","score":0.41394466161727905},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3410928249359131},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.1689983308315277},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1424328088760376},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.14174634218215942}],"concepts":[{"id":"https://openalex.org/C2779041454","wikidata":"https://www.wikidata.org/wiki/Q870780","display_name":"Chatbot","level":2,"score":0.9730130434036255},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.9243991374969482},{"id":"https://openalex.org/C2993776861","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Open domain","level":3,"score":0.7394742369651794},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7358880043029785},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5765347480773926},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.44841068983078003},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.44384053349494934},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4157698452472687},{"id":"https://openalex.org/C2776352735","wikidata":"https://www.wikidata.org/wiki/Q2313343","display_name":"Turn-taking","level":3,"score":0.41394466161727905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3410928249359131},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.1689983308315277},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1424328088760376},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.14174634218215942},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3607199.3607237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3607199.3607237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2307.09579","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.09579","pdf_url":"https://arxiv.org/pdf/2307.09579","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2307.09579","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.09579","pdf_url":"https://arxiv.org/pdf/2307.09579","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2704182886","display_name":null,"funder_award_id":"76421595","funder_id":"https://openalex.org/F4320307791","funder_display_name":"Cisco Systems"},{"id":"https://openalex.org/G3092172996","display_name":null,"funder_award_id":"CNS-1950171","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7393879419","display_name":"SaTC: CORE: Small: URadio: Towards Secure Smart Home IoT Communication Using Hybrid Ultrasonic-RF Radio","funder_award_id":"1950171","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307791","display_name":"Cisco Systems","ror":"https://ror.org/03yt1ez60"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4384918632.pdf","grobid_xml":"https://content.openalex.org/works/W4384918632.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2540646130","https://openalex.org/W2604799547","https://openalex.org/W2741321799","https://openalex.org/W2785896739","https://openalex.org/W2791017690","https://openalex.org/W2808135558","https://openalex.org/W2895814682","https://openalex.org/W2896457183","https://openalex.org/W2900796872","https://openalex.org/W2907283777","https://openalex.org/W2912102236","https://openalex.org/W2913843539","https://openalex.org/W2922580172","https://openalex.org/W2926555354","https://openalex.org/W2949089361","https://openalex.org/W2963456134","https://openalex.org/W2971173235","https://openalex.org/W2971307358","https://openalex.org/W2971365351","https://openalex.org/W2982756474","https://openalex.org/W2988937804","https://openalex.org/W2993013587","https://openalex.org/W2997763445","https://openalex.org/W3002330681","https://openalex.org/W3011309387","https://openalex.org/W3034600233","https://openalex.org/W3046747294","https://openalex.org/W3100355250","https://openalex.org/W3155584966","https://openalex.org/W3171850892","https://openalex.org/W3184324824","https://openalex.org/W4206637810","https://openalex.org/W4285163447","https://openalex.org/W4287900772","https://openalex.org/W4292779060","https://openalex.org/W4295106014","https://openalex.org/W4296413526","https://openalex.org/W4308391526","https://openalex.org/W4321472314","https://openalex.org/W4376163495","https://openalex.org/W4379087432","https://openalex.org/W4380433161","https://openalex.org/W4381245692","https://openalex.org/W4385570982","https://openalex.org/W4385571158","https://openalex.org/W4385571232","https://openalex.org/W4385571804","https://openalex.org/W4385894687","https://openalex.org/W4386246835","https://openalex.org/W6778883912"],"related_works":["https://openalex.org/W3176963395","https://openalex.org/W2228992124","https://openalex.org/W2613954729","https://openalex.org/W3130118443","https://openalex.org/W1955947659","https://openalex.org/W2759378137","https://openalex.org/W2376974064","https://openalex.org/W3188579947","https://openalex.org/W2947270058","https://openalex.org/W4289529495"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,23,73,97,143,161,169,193,218],"natural":[3],"language":[4],"processing":[5],"and":[6,207,214,221],"machine":[7],"learning":[8],"have":[9],"led":[10],"to":[11,36,95,139,176,189,209],"the":[12,31,65,148,162,167,174,223],"development":[13],"of":[14,33,64,111,225],"chatbot":[15,94,105,134,191],"models,":[16],"such":[17],"as":[18],"ChatGPT,":[19],"that":[20,62,69,132,184],"can":[21,136,201],"engage":[22,96],"conversational":[24,219],"dialogue":[25,220],"with":[26,99,108,119],"human":[27],"users.":[28,229],"However,":[29],"understanding":[30],"ability":[32],"these":[34],"models":[35,135],"generate":[37,140],"toxic":[38,71,141,216],"or":[39],"harmful":[40],"responses":[41,142,217],"during":[42],"a":[43,74,87,93,100,109,120,123,144,153,170,194],"non-toxic":[44,67],"multi-turn":[45,145],"conversation":[46,75,98,113,117,159],"remains":[47],"an":[48],"open":[49],"research":[50,53,186],"problem.":[51],"Existing":[52],"focuses":[54],"on":[55],"single-turn":[56],"sentence":[57,121],"testing,":[58],"while":[59],"we":[60,85],"find":[61],"82%":[63],"individual":[66],"sentences":[68,126],"elicit":[70],"behaviors":[72],"are":[76],"considered":[77],"safe":[78],"by":[79,91,204],"existing":[80],"tools.":[81],"In":[82,147],"this":[83],"paper,":[84],"design":[86],"new":[88],"attack,":[89],"ToxicChat,":[90],"fine-tuning":[92,163],"target":[101],"open-domain":[102,133],"chatbot.":[103],"The":[104,158,198],"is":[106,187],"fine-tuned":[107],"collection":[110],"crafted":[112,124],"sequences.":[114],"Particularly,":[115],"each":[116],"begins":[118],"from":[122],"prompt":[125],"dataset.":[127],"Our":[128,181],"extensive":[129],"evaluation":[130],"shows":[131],"be":[137,202],"triggered":[138],"conversation.":[146],"best":[149],"scenario,":[150],"ToxicChat":[151,200],"achieves":[152],"67%":[154],"toxicity":[155,168,192],"activation":[156],"rate.":[157],"sequences":[160],"stage":[164],"help":[165],"trigger":[166],"conversation,":[171],"which":[172],"allows":[173],"attack":[175],"bypass":[177],"two":[178],"defense":[179],"methods.":[180],"findings":[182],"suggest":[183],"further":[185],"needed":[188],"address":[190],"dynamic":[195],"interactive":[196],"environment.":[197],"proposed":[199],"used":[203],"both":[205],"industry":[206],"researchers":[208],"develop":[210],"methods":[211],"for":[212,227],"detecting":[213],"mitigating":[215],"improve":[222],"robustness":[224],"chatbots":[226],"end":[228]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
