{"id":"https://openalex.org/W4412073525","doi":"https://doi.org/10.1145/3748239.3748242","title":"AttackEval: How to Evaluate the Effectiveness of Jailbreak Attacking on Large Language Models","display_name":"AttackEval: How to Evaluate the Effectiveness of Jailbreak Attacking on Large Language Models","publication_year":2025,"publication_date":"2025-07-07","ids":{"openalex":"https://openalex.org/W4412073525","doi":"https://doi.org/10.1145/3748239.3748242"},"language":"en","primary_location":{"id":"doi:10.1145/3748239.3748242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3748239.3748242","pdf_url":null,"source":{"id":"https://openalex.org/S4210176598","display_name":"ACM SIGKDD Explorations Newsletter","issn_l":"1931-0145","issn":["1931-0145","1931-0153"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGKDD Explorations Newsletter","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090044664","display_name":"Dong Shu","orcid":"https://orcid.org/0000-0003-1355-454X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dong Shu","raw_affiliation_strings":["Northwestern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100393899","display_name":"Chong Zhang","orcid":"https://orcid.org/0000-0002-2162-4344"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chong Zhang","raw_affiliation_strings":["University of Liverpool"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Liverpool","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102521857","display_name":"Mingyu Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Mingyu Jin","raw_affiliation_strings":["Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101838110","display_name":"Zihao Zhou","orcid":"https://orcid.org/0000-0002-5306-9656"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zihao Zhou","raw_affiliation_strings":["University of Liverpool"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Liverpool","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031522503","display_name":"Lingyao Li","orcid":"https://orcid.org/0000-0001-5888-8311"},"institutions":[{"id":"https://openalex.org/I2613432","display_name":"University of South Florida","ror":"https://ror.org/032db5x82","country_code":"US","type":"education","lineage":["https://openalex.org/I2613432"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lingyao Li","raw_affiliation_strings":["University of South Florida"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of South Florida","institution_ids":["https://openalex.org/I2613432"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090044664"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.4343,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.98158665,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"27","issue":"1","first_page":"10","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9434999823570251,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12268","display_name":"Deception detection and forensic psychology","score":0.939300000667572,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3957153260707855},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.38178551197052},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.1317751109600067}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3957153260707855},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.38178551197052},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.1317751109600067}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3748239.3748242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3748239.3748242","pdf_url":null,"source":{"id":"https://openalex.org/S4210176598","display_name":"ACM SIGKDD Explorations Newsletter","issn_l":"1931-0145","issn":["1931-0145","1931-0153"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGKDD Explorations Newsletter","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W4226278401","https://openalex.org/W4321650181","https://openalex.org/W4378506863","https://openalex.org/W4380353722","https://openalex.org/W4381586841","https://openalex.org/W4386557007","https://openalex.org/W4386907292","https://openalex.org/W4387436590","https://openalex.org/W4387559087","https://openalex.org/W4387561119","https://openalex.org/W4387635776","https://openalex.org/W4389364428","https://openalex.org/W4391590954","https://openalex.org/W4392222150","https://openalex.org/W4393946427","https://openalex.org/W4394780610","https://openalex.org/W4399317048","https://openalex.org/W4399511904","https://openalex.org/W4401066072","https://openalex.org/W4401657794","https://openalex.org/W6600769105","https://openalex.org/W6851775633","https://openalex.org/W6854692045","https://openalex.org/W6857622890","https://openalex.org/W6858529218","https://openalex.org/W6860697757","https://openalex.org/W6873268653","https://openalex.org/W6874705770"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Jailbreak":[0],"attacks":[1,35],"represent":[2],"one":[3],"of":[4,12,33,47,87,176],"the":[5,10,31,45,52,85,134],"most":[6],"sophisticated":[7],"threats":[8],"to":[9,77],"security":[11],"large":[13],"language":[14],"models":[15],"(LLMs).":[16],"To":[17],"deal":[18],"with":[19,126,138],"such":[20],"risks,":[21],"we":[22,94],"introduce":[23],"an":[24],"innovative":[25],"framework":[26,70],"that":[27,133,157],"can":[28],"help":[29],"evaluate":[30],"effectiveness":[32,89],"jailbreak":[34,104],"on":[36,44],"LLMs.":[37],"Unlike":[38],"traditional":[39,127,162],"binary":[40],"evaluations":[41],"focusing":[42],"solely":[43],"robustness":[46],"LLMs,":[48],"our":[49,113,130,165],"method":[50],"assesses":[51],"attacking":[53],"prompts'":[54],"effectiveness.":[55],"We":[56],"present":[57],"two":[58],"distinct":[59],"evaluation":[60,64,128],"frameworks:":[61],"a":[62,66,72,96,109,118,143,168,173],"coarse-grained":[63],"and":[65,82,116,146],"fine-grained":[67,147],"evaluation.":[68],"Each":[69],"uses":[71],"scoring":[73],"range":[74,175],"from":[75],"0":[76],"1,":[78],"offering":[79,142],"unique":[80],"perspectives":[81],"allowing":[83],"for":[84,103,112,121,171],"assessment":[86],"attack":[88,155,177],"in":[90,161,179],"different":[91],"scenarios.":[92],"Additionally,":[93],"develop":[95],"comprehensive":[97],"ground":[98],"truth":[99],"dataset":[100,107],"specifically":[101],"tailored":[102],"prompts.":[105],"This":[106],"is":[108],"crucial":[110],"benchmark":[111],"current":[114,135],"study":[115,131],"provides":[117],"foundational":[119],"resource":[120],"future":[122],"research.":[123],"By":[124],"comparing":[125],"methods,":[129],"shows":[132],"results":[136],"align":[137],"baseline":[139],"metrics":[140],"while":[141],"more":[144],"nuanced":[145],"assessment.":[148],"It":[149],"also":[150],"helps":[151],"identify":[152],"potentially":[153],"harmful":[154],"prompts":[156,178],"might":[158],"appear":[159],"harmless":[160],"evaluations.":[163],"Overall,":[164],"work":[166],"establishes":[167],"solid":[169],"foundation":[170],"assessing":[172],"broader":[174],"prompt":[180],"injection.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
