{"id":"https://openalex.org/W3189711920","doi":"https://doi.org/10.1145/3461702.3462578","title":"The Earth Is Flat and the Sun Is Not a Star: The Susceptibility of GPT-2 to Universal Adversarial Triggers","display_name":"The Earth Is Flat and the Sun Is Not a Star: The Susceptibility of GPT-2 to Universal Adversarial Triggers","publication_year":2021,"publication_date":"2021-07-21","ids":{"openalex":"https://openalex.org/W3189711920","doi":"https://doi.org/10.1145/3461702.3462578","mag":"3189711920"},"language":"en","primary_location":{"id":"doi:10.1145/3461702.3462578","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3461702.3462578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017691448","display_name":"Hunter Scott Heidenreich","orcid":null},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hunter Scott Heidenreich","raw_affiliation_strings":["Drexel University, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Drexel University, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101587041","display_name":"Jake Ryland Williams","orcid":"https://orcid.org/0000-0002-7050-8403"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jake Ryland Williams","raw_affiliation_strings":["Drexel University, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Drexel University, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I72816309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5017691448"],"corresponding_institution_ids":["https://openalex.org/I72816309"],"apc_list":null,"apc_paid":null,"fwci":1.3597,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.84527775,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"566","last_page":"573"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7043680548667908},{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.6516707539558411},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6457715630531311},{"id":"https://openalex.org/keywords/safeguarding","display_name":"Safeguarding","score":0.5059577822685242},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.48053961992263794},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.47681161761283875},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4717833697795868},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.4609999656677246},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.4390106797218323},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.4351794123649597},{"id":"https://openalex.org/keywords/internet-privacy","display_name":"Internet privacy","score":0.4284715950489044},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.36511462926864624},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35414135456085205},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.23013010621070862},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17456412315368652},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.1628732979297638},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12045669555664062}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7043680548667908},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.6516707539558411},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6457715630531311},{"id":"https://openalex.org/C2776743756","wikidata":"https://www.wikidata.org/wiki/Q5097921","display_name":"Safeguarding","level":2,"score":0.5059577822685242},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.48053961992263794},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.47681161761283875},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4717833697795868},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.4609999656677246},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4390106797218323},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.4351794123649597},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.4284715950489044},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.36511462926864624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35414135456085205},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.23013010621070862},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17456412315368652},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1628732979297638},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12045669555664062},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C159110408","wikidata":"https://www.wikidata.org/wiki/Q121176","display_name":"Nursing","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3461702.3462578","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3461702.3462578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W615621364","https://openalex.org/W2107019937","https://openalex.org/W2476960546","https://openalex.org/W2483215953","https://openalex.org/W2769358515","https://openalex.org/W2795038878","https://openalex.org/W2799007037","https://openalex.org/W2799194071","https://openalex.org/W2889624842","https://openalex.org/W2893425640","https://openalex.org/W2897042519","https://openalex.org/W2903795157","https://openalex.org/W2922770828","https://openalex.org/W2926555354","https://openalex.org/W2958608582","https://openalex.org/W2962784628","https://openalex.org/W2962816513","https://openalex.org/W2963661177","https://openalex.org/W2963969878","https://openalex.org/W2972413484","https://openalex.org/W2982756474","https://openalex.org/W3034723486","https://openalex.org/W3037697022","https://openalex.org/W3100279624","https://openalex.org/W3110909889","https://openalex.org/W3133702157"],"related_works":["https://openalex.org/W2389103123","https://openalex.org/W4320165414","https://openalex.org/W2757136988","https://openalex.org/W1599620240","https://openalex.org/W3028445819","https://openalex.org/W3021165149","https://openalex.org/W2966491090","https://openalex.org/W3214110207","https://openalex.org/W3197309300","https://openalex.org/W2985263817"],"abstract_inverted_index":{"This":[0],"work":[1,42,128,160],"considers":[2],"universal":[3],"adversarial":[4,195],"triggers,":[5],"a":[6,111],"method":[7],"of":[8,31,103,113,116,135,155,194],"adversarially":[9],"disrupting":[10],"natural":[11],"language":[12,168],"models,":[13],"and":[14,29,138,153,181],"questions":[15],"if":[16,139,175],"it":[17],"is":[18,178],"possible":[19],"to":[20,24,53,82,89,131,162,172,199,203],"use":[21,130],"such":[22,156],"triggers":[23,47,84],"affect":[25],"both":[26,99],"the":[27,50,63,65,69,75,104,114,117,133,151,164,176,188],"topic":[28],"stance":[30,64],"conditional":[32],"text":[33,55,66],"generation":[34],"models.":[35],"In":[36,149],"considering":[37],"four":[38],"\"controversial\"":[39],"topics,":[40,124],"this":[41,98,159,173,192],"demonstrates":[43],"success":[44],"at":[45],"identifying":[46],"that":[48,126,166],"cause":[49],"GPT-2":[51],"model":[52,177,185],"produce":[54],"about":[56],"targeted":[57],"topics":[58,78],"as":[59,61,100],"well":[60],"influence":[62],"takes":[67],"towards":[68],"topic.":[70],"We":[71,96],"show":[72],"that,":[73],"while":[74],"more":[76,80,90],"fringe":[77],"are":[79,141,170],"challenging":[81],"identify":[83],"for,":[85],"they":[86,140],"do":[87],"appear":[88],"effectively":[91],"discriminate":[92],"aspects":[93],"like":[94],"stance.":[95],"view":[97],"an":[101,157],"indication":[102],"dangerous":[105],"potential":[106,201],"for":[107],"controllability":[108],"and,":[109],"perhaps,":[110],"reflection":[112],"nature":[115,134],"disconnect":[118],"between":[119],"conflicting":[120],"views":[121],"on":[122,146],"these":[123],"something":[125],"future":[127],"could":[129],"question":[132],"filter":[136],"bubbles":[137],"reflected":[142],"within":[143],"models":[144,169],"trained":[145],"internet":[147],"content.":[148],"demonstrating":[150],"feasibility":[152],"ease":[154],"attack,":[158],"seeks":[161],"raise":[163],"awareness":[165],"neural":[167],"susceptible":[171],"influence--even":[174],"already":[179],"deployed":[180],"adversaries":[182],"lack":[183],"internal":[184],"access--and":[186],"advocates":[187],"immediate":[189],"safeguarding":[190],"against":[191],"type":[193],"attack":[196],"in":[197],"order":[198],"prevent":[200],"harm":[202],"human":[204],"users.":[205]},"counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
