{"id":"https://openalex.org/W7138077102","doi":"https://doi.org/10.1609/aaai.v40i42.40858","title":"Activation Manipulation Attack: Penetrating and Harmful Jailbreak Attack Against Large Vision-Language Models","display_name":"Activation Manipulation Attack: Penetrating and Harmful Jailbreak Attack Against Large Vision-Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138077102","doi":"https://doi.org/10.1609/aaai.v40i42.40858"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i42.40858","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i42.40858","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i42.40858","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129645839","display_name":"Haojie Hao","orcid":null},"institutions":[{"id":"https://openalex.org/I188522409","display_name":"Critical Software (Portugal)","ror":"https://ror.org/03er2hr05","country_code":"PT","type":"company","lineage":["https://openalex.org/I188522409"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN","PT"],"is_corresponding":true,"raw_author_name":"Haojie Hao","raw_affiliation_strings":["State Key Laboratory of Complex & Critical Software Environment, Beihang University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex & Critical Software Environment, Beihang University","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I188522409"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129658927","display_name":"Jiakai Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiakai Wang","raw_affiliation_strings":["Zhongguancun Laboratory"],"affiliations":[{"raw_affiliation_string":"Zhongguancun Laboratory","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129699985","display_name":"Aishan Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I188522409","display_name":"Critical Software (Portugal)","ror":"https://ror.org/03er2hr05","country_code":"PT","type":"company","lineage":["https://openalex.org/I188522409"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN","PT"],"is_corresponding":false,"raw_author_name":"Aishan Liu","raw_affiliation_strings":["State Key Laboratory of Complex & Critical Software Environment, Beihang University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex & Critical Software Environment, Beihang University","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I188522409"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101598711","display_name":"Yuqing Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105595","display_name":"Institute of Art","ror":"https://ror.org/017fyx225","country_code":"PL","type":"facility","lineage":["https://openalex.org/I4210105595","https://openalex.org/I99542240"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN","PL"],"is_corresponding":false,"raw_author_name":"Yuqing Ma","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I4210105595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044945190","display_name":"Haotong Qin","orcid":"https://orcid.org/0000-0001-7391-7539"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Haotong Qin","raw_affiliation_strings":["Department of Information Technology and Electrical Engineering, ETH Zurich"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology and Electrical Engineering, ETH Zurich","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129665829","display_name":"Yuanfang Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I188522409","display_name":"Critical Software (Portugal)","ror":"https://ror.org/03er2hr05","country_code":"PT","type":"company","lineage":["https://openalex.org/I188522409"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN","PT"],"is_corresponding":false,"raw_author_name":"Yuanfang Guo","raw_affiliation_strings":["State Key Laboratory of Complex & Critical Software Environment, Beihang University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex & Critical Software Environment, Beihang University","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I188522409"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129686880","display_name":"Xianglong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianglong Liu","raw_affiliation_strings":["State Key Laboratory of Complex & Critical Software Environment, Beihang University\nZhongguancun Laboratory\nInstitute of Dataspace, Hefei, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex & Critical Software Environment, Beihang University\nZhongguancun Laboratory\nInstitute of Dataspace, Hefei, China","institution_ids":["https://openalex.org/I4210128818"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129645839"],"corresponding_institution_ids":["https://openalex.org/I188522409","https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2987152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"42","first_page":"35481","last_page":"35489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9491000175476074,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9491000175476074,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.007600000128149986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.004999999888241291,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/camouflage","display_name":"Camouflage","score":0.6535000205039978},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5023000240325928},{"id":"https://openalex.org/keywords/framing","display_name":"Framing (construction)","score":0.37860000133514404},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.3700999915599823},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.3199999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6739000082015991},{"id":"https://openalex.org/C2776196576","wikidata":"https://www.wikidata.org/wiki/Q196113","display_name":"Camouflage","level":2,"score":0.6535000205039978},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5023000240325928},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.44040000438690186},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.34220001101493835},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3188999891281128},{"id":"https://openalex.org/C80107235","wikidata":"https://www.wikidata.org/wiki/Q7162625","display_name":"Penetration (warfare)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C3017997152","wikidata":"https://www.wikidata.org/wiki/Q814610","display_name":"Emergency response","level":2,"score":0.25940001010894775}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i42.40858","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i42.40858","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i42.40858","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i42.40858","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.805422842502594,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"Large":[1],"Vision-Language":[2],"Models":[3],"(LVLMs)":[4],"have":[5],"been":[6],"demonstrated":[7],"to":[8,11,21,38,55,127,166],"be":[9],"vulnerable":[10],"jailbreak":[12,30,160],"attacks,":[13],"highlighting":[14],"the":[15,43,69,82,92,112,152,176],"urgent":[16],"need":[17],"for":[18],"further":[19],"research":[20],"comprehensively":[22],"identify":[23],"and":[24,59,84,95],"mitigate":[25],"these":[26],"threats.":[27],"Unfortunately,":[28],"existing":[29],"studies":[31],"primarily":[32],"focus":[33],"on":[34,183],"coarse-grained":[35],"input":[36],"manipulation":[37],"elicit":[39],"specific":[40],"responses,":[41],"overlooking":[42],"exploitation":[44],"of":[45,87,97,178,201],"internal":[46],"representations,":[47],"i.e.,":[48],"intermediate":[49],"activations,":[50],"which":[51,75,158],"constrains":[52],"their":[53],"ability":[54],"penetrate":[56],"alignment":[57],"safeguards":[58],"generate":[60],"harmful":[61],"responses.":[62,180],"To":[63,99,140],"tackle":[64],"this":[65],"issue,":[66],"we":[67,103,143],"propose":[68],"Activation":[70],"Manipulation":[71],"(ActMan)":[72],"Attack":[73],"framework,":[74],"performs":[76],"fine-grained":[77],"activation":[78,130,164],"manipulations":[79],"inspired":[80,110],"by":[81,111],"perception":[83],"cognition":[85],"stages":[86],"human":[88,116,156],"decision-making,":[89],"enhancing":[90],"both":[91],"penetration":[93,101],"capability":[94],"harmfulness":[96,177],"attacks.":[98],"improve":[100],"capability,":[102],"introduce":[104],"a":[105,121,145],"Deceptive":[106],"Visual":[107],"Camouflage":[108],"module":[109,119,149],"masking":[113],"effect":[114,154],"in":[115,155],"perception.":[117],"This":[118],"uses":[120],"benign":[122],"activation-guided":[123],"attention":[124],"redirection":[125],"strategy":[126],"conceal":[128],"abnormal":[129],"patterns,":[131],"thereby":[132,174],"suppressing":[133],"LVLM's":[134],"defense":[135],"detection":[136],"during":[137,171],"early-stage":[138],"decoding.":[139],"enhance":[141],"harmfulness,":[142],"design":[144],"Malicious":[146],"Semantic":[147],"Induction":[148],"drawing":[150],"from":[151],"framing":[153],"cognition,":[157],"reconstructs":[159],"instructions":[161],"using":[162],"malicious":[163],"guidance":[165],"change":[167],"LVLM\u2019s":[168],"risk":[169],"assessment":[170],"late-stage":[172],"decoding,":[173],"amplifying":[175],"model":[179],"Extensive":[181],"experiments":[182],"six":[184],"mainstream":[185],"LVLMs":[186],"demonstrate":[187],"that":[188],"our":[189],"method":[190],"remarkably":[191],"outperforms":[192],"state-of-the-art":[193],"baselines,":[194],"achieving":[195],"an":[196],"average":[197],"relative":[198],"ASR":[199],"improvement":[200],"12.06%.":[202]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
