{"id":"https://openalex.org/W4411522891","doi":"https://doi.org/10.1145/3728971","title":"S-Eval: Towards Automated and Comprehensive Safety Evaluation for Large Language Models","display_name":"S-Eval: Towards Automated and Comprehensive Safety Evaluation for Large Language Models","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4411522891","doi":"https://doi.org/10.1145/3728971"},"language":"en","primary_location":{"id":"doi:10.1145/3728971","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728971","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3728971","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105989617","display_name":"X. Q. Yuan","orcid":"https://orcid.org/0009-0002-7391-5917"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaohan Yuan","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016900374","display_name":"Jianqiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinfeng Li","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037566066","display_name":"Dongxia Wang","orcid":"https://orcid.org/0000-0001-9812-3911"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxia Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039044021","display_name":"Yuefeng Chen","orcid":"https://orcid.org/0000-0001-9027-3421"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuefeng Chen","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051298007","display_name":"Xiaofeng Mao","orcid":"https://orcid.org/0000-0003-4486-556X"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Mao","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058197951","display_name":"Longtao Huang","orcid":"https://orcid.org/0000-0002-0517-1592"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longtao Huang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101864441","display_name":"Jialuo Chen","orcid":"https://orcid.org/0000-0003-4322-4285"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialuo Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337747","display_name":"Hui Xue","orcid":"https://orcid.org/0000-0002-2093-2839"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Xue","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705977","display_name":"Xiaoxia Liu","orcid":"https://orcid.org/0009-0003-6662-9020"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxia Liu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030133562","display_name":"Wenhai Wang","orcid":"https://orcid.org/0000-0002-1936-2840"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhai Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000596496","display_name":"Kui Ren","orcid":"https://orcid.org/0000-0003-3441-6277"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kui Ren","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100319491","display_name":"Jingyi Wang","orcid":"https://orcid.org/0000-0001-7113-7635"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyi Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5105989617"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":12.666,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.98326112,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"2","issue":"ISSTA","first_page":"2136","last_page":"2157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9559999704360962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformative-learning","display_name":"Transformative learning","score":0.5902789831161499},{"id":"https://openalex.org/keywords/risk-management","display_name":"Risk management","score":0.4761076867580414},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47198015451431274},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.46338948607444763},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4515109658241272},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.4184054136276245},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.41034409403800964},{"id":"https://openalex.org/keywords/risk-analysis","display_name":"Risk analysis (engineering)","score":0.37633568048477173},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3466665744781494},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26027506589889526},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.1734069585800171},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.1427101492881775}],"concepts":[{"id":"https://openalex.org/C70587473","wikidata":"https://www.wikidata.org/wiki/Q7834111","display_name":"Transformative learning","level":2,"score":0.5902789831161499},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.4761076867580414},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47198015451431274},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.46338948607444763},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4515109658241272},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.4184054136276245},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.41034409403800964},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.37633568048477173},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3466665744781494},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26027506589889526},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.1734069585800171},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.1427101492881775},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3728971","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728971","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3728971","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728971","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1944563941","https://openalex.org/W2010435301","https://openalex.org/W2334513225","https://openalex.org/W2494072813","https://openalex.org/W2759575792","https://openalex.org/W3035025622","https://openalex.org/W3098267758","https://openalex.org/W3174685870","https://openalex.org/W3185341429","https://openalex.org/W4400315206","https://openalex.org/W4400484590","https://openalex.org/W4400941260","https://openalex.org/W4402670748","https://openalex.org/W4404782219","https://openalex.org/W6829339952"],"related_works":["https://openalex.org/W2169196470","https://openalex.org/W3113185420","https://openalex.org/W4237580245","https://openalex.org/W4385368139","https://openalex.org/W3041075136","https://openalex.org/W2285205992","https://openalex.org/W3006961029","https://openalex.org/W2480445825","https://openalex.org/W2153011985","https://openalex.org/W2483482908"],"abstract_inverted_index":{"Generative":[0],"large":[1],"language":[2,8,338],"models":[3,293],"(LLMs)":[4],"have":[5],"revolutionized":[6],"natural":[7],"processing":[9],"with":[10,118,161],"their":[11,289],"transformative":[12],"and":[13,36,49,55,79,137,170,186,213,226,246,263,270,285,305,313,321,337],"emergent":[14],"capabilities.":[15],"However,":[16],"recent":[17],"evidence":[18],"indicates":[19],"that":[20,26],"LLMs":[21,60,233,280,312,367],"can":[22,183,301],"produce":[23],"harmful":[24],"content":[25,88],"violates":[27],"social":[28],"norms,":[29],"raising":[30],"significant":[31,204],"concerns":[32],"regarding":[33],"the":[34,68,86,99,105,162,277,308,327,333,361],"safety":[35,57,94,140,178,188,228,316,322,363],"ethical":[37],"ramifications":[38],"of":[39,59,70,173,194,266,279,311,335,365,370],"deploying":[40],"these":[41],"advanced":[42],"models.":[43],"Thus,":[44],"it":[45,74],"is":[46,152,243],"both":[47],"critical":[48],"imperative":[50],"to":[51,67,83,97,198,307,326,346],"perform":[52],"a":[53,77,111,119,138,171,211,259],"rigorous":[54],"comprehensive":[56,122],"evaluation":[58,364],"before":[61],"deployment.":[62],"Despite":[63],"this":[64],"need,":[65],"owing":[66],"extensiveness":[69],"LLM":[71,87,134,142,149,180,268],"generation":[72,319],"space,":[73],"still":[75],"lacks":[76],"unified":[78],"standardized":[80],"risk":[81,123,164,168,192],"taxonomy":[82],"systematically":[84],"reflect":[85],"safety,":[89,290,342],"as":[90,92,295],"well":[91],"automated":[93,114,245,362],"assessment":[95,262],"techniques":[96],"explore":[98],"potential":[100,267],"risks":[101,222,278],"efficiently.":[102],"To":[103],"bridge":[104],"striking":[106],"gap,":[107],"we":[108,209],"propose":[109],"S-Eval,":[110],"novel":[112,139],"LLM-based":[113,328],"Safety":[115],"Evaluation":[116],"framework":[117],"newly":[120],"defined":[121],"taxonomy.":[124],"S-Eval":[125,201,257,300,352],"incorporates":[126],"two":[127],"key":[128],"components,":[129],"i.e.,":[130],"an":[131],"expert":[132,147],"testing":[133,148],"M":[135,143,150,181,224,235,271],"t":[136,151,225],"critique":[141,179,323],"c":[144,182,236,272],".":[145],"The":[146,177,240],"responsible":[153],"for":[154,190,230,349,360],"automatically":[155],"generating":[156],"test":[157,218,318],"cases":[158,219],"in":[159,203,356,375],"accordance":[160],"proposed":[163],"management":[165],"(including":[166],"8":[167],"dimensions":[169],"total":[172],"102":[174,221],"subdivided":[175],"risks).":[176],"provide":[184],"quantitative":[185],"explainable":[187,284],"evaluations":[189,229],"better":[191,264],"awareness":[193],"LLMs.":[195],"In":[196],"contrast":[197],"prior":[199],"works,":[200],"differs":[202],"ways:":[205],"(i)":[206],"efficient":[207],"\u2013":[208,253,299],"construct":[210],"multi-dimensional":[212],"open-ended":[214],"benchmark":[215],"comprising":[216],"220,000":[217],"across":[220],"utilizing":[223],"conduct":[227],"21":[231],"influential":[232],"via":[234],"on":[237,340],"our":[238,357],"benchmark.":[239],"entire":[241],"process":[242],"fully":[244],"requires":[247],"no":[248],"human":[249],"involvement.":[250],"(ii)":[251],"effective":[252],"extensive":[254],"validations":[255],"show":[256],"facilitates":[258],"more":[260],"thorough":[261],"perception":[265],"risks,":[269],"not":[273],"only":[274],"accurately":[275],"quantifies":[276],"but":[281],"also":[282],"provides":[283],"in-depth":[286],"insights":[287],"into":[288],"surpassing":[291],"comparable":[292],"such":[294],"LLaMA-Guard-2.":[296],"(iii)":[297],"adaptive":[298],"be":[302],"flexibly":[303],"configured":[304],"adapted":[306],"rapid":[309],"evolution":[310],"accompanying":[314],"new":[315],"threats,":[317],"methods":[320,324],"thanks":[325],"architecture.":[329],"We":[330],"further":[331],"study":[332],"impact":[334],"hyper-parameters":[336],"environments":[339],"model":[341],"which":[343],"may":[344],"lead":[345],"promising":[347],"directions":[348],"future":[350],"research.":[351],"has":[353],"been":[354],"deployed":[355],"industrial":[358],"partner":[359],"multiple":[366],"serving":[368],"millions":[369],"users,":[371],"demonstrating":[372],"its":[373],"effectiveness":[374],"real-world":[376],"scenarios.":[377]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
