{"id":"https://openalex.org/W4416966012","doi":"https://doi.org/10.1109/tmm.2025.3639961","title":"Invisible Backdoor Attack With Siamese Tuning on Pre-Trained Vision-Language Models","display_name":"Invisible Backdoor Attack With Siamese Tuning on Pre-Trained Vision-Language Models","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W4416966012","doi":"https://doi.org/10.1109/tmm.2025.3639961"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2025.3639961","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3639961","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100382479","display_name":"Bing Wang","orcid":"https://orcid.org/0000-0002-0180-5623"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bing Wang","raw_affiliation_strings":["School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China","School of Computer Science and Engineering, Tianjin University of Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China","institution_ids":["https://openalex.org/I136765683"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Tianjin University of Technology, China","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073601707","display_name":"Shengsheng Qian","orcid":"https://orcid.org/0000-0001-9488-2208"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengsheng Qian","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022636178","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100382479"],"corresponding_institution_ids":["https://openalex.org/I136765683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21219459,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"1663","last_page":"1676"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.3160000145435333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.3160000145435333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13850000500679016,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1256999969482422,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backdoor","display_name":"Backdoor","score":0.9944999814033508},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6395999789237976},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.44510000944137573},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.4307999908924103},{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.31690001487731934},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.302700012922287}],"concepts":[{"id":"https://openalex.org/C2781045450","wikidata":"https://www.wikidata.org/wiki/Q254569","display_name":"Backdoor","level":2,"score":0.9944999814033508},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7949000000953674},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6395999789237976},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5169000029563904},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.4307999908924103},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.41830000281333923},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38749998807907104},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27320000529289246},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.26010000705718994}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3639961","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3639961","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W2891874923","https://openalex.org/W2934843808","https://openalex.org/W2963534994","https://openalex.org/W2970231061","https://openalex.org/W2986013765","https://openalex.org/W2990270730","https://openalex.org/W3042368254","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3128339783","https://openalex.org/W3180353325","https://openalex.org/W3189812816","https://openalex.org/W3198377975","https://openalex.org/W3201927751","https://openalex.org/W3215966579","https://openalex.org/W4221086901","https://openalex.org/W4225166070","https://openalex.org/W4225832925","https://openalex.org/W4285606530","https://openalex.org/W4286611322","https://openalex.org/W4294409011","https://openalex.org/W4312480274","https://openalex.org/W4312569687","https://openalex.org/W4312655527","https://openalex.org/W4312715548","https://openalex.org/W4312818263","https://openalex.org/W4378697123","https://openalex.org/W4384659783","https://openalex.org/W4385570650","https://openalex.org/W4386065385","https://openalex.org/W4386065388","https://openalex.org/W4386523254","https://openalex.org/W4387272106","https://openalex.org/W4399487370","https://openalex.org/W4399568683","https://openalex.org/W4400810332","https://openalex.org/W4404563144"],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"pre-trained":[1,98,138],"Vision-Language":[2],"Models":[3],"(VLMs)":[4],"have":[5],"shown":[6],"impressive":[7],"cross-modal":[8],"alignment":[9],"capabilities":[10],"in":[11,60,184],"images":[12],"and":[13,84,86,107,118,168,192],"text":[14],"extraction.":[15],"Despite":[16],"their":[17,28,120],"strengths,":[18],"these":[19,57],"models":[20],"are":[21],"vulnerable":[22],"to":[23,27,52,77,105,149,197],"backdoor":[24,35,132],"attacks":[25,36],"due":[26],"heavy":[29],"reliance":[30],"on":[31,37,137,208],"training":[32],"data.":[33],"Prevailing":[34],"VLMs":[38],"involve":[39],"the":[40,46,50,70,94,97,113,124,152,155,164,170,185,198,202],"injection":[41],"of":[42,96,115,154,201,220],"subtle":[43],"patches":[44],"into":[45],"pre-training":[47,75],"process,":[48],"causing":[49],"model":[51,162,172],"exhibit":[53],"harmful":[54],"behaviors":[55],"when":[56],"triggers":[58],"appear":[59],"test":[61],"images.":[62],"However,":[63],"existing":[64],"attack":[65,133,218],"methods":[66],"typically":[67],"suffer":[68],"from":[69],"following":[71],"limitations:":[72],"(1)":[73],"Polluting":[74],"data":[76],"train":[78],"a":[79,129,143,160,174,216],"poisoned":[80,175],"VLM":[81,157,187],"is":[82,182],"expensive":[83],"time-consuming,":[85],"this":[87],"extra":[88],"retraining":[89],"phase":[90],"could":[91],"potentially":[92],"harm":[93],"performance":[95,219],"VLM;":[99],"(2)":[100],"Backdoor":[101],"triggers,":[102],"often":[103],"visible":[104],"humans":[106],"requiring":[108],"elaborate":[109],"placements,":[110],"significantly":[111],"raise":[112],"risk":[114],"being":[116],"detected":[117],"compromise":[119],"feasibility.":[121],"To":[122],"overcome":[123],"above":[125],"limitations,":[126],"we":[127,141],"propose":[128],"novel":[130],"invisible":[131],"with":[134,163,173],"Siamese":[135,144,161,171],"tuning":[136],"VLMs.":[139,225],"Specifically,":[140],"design":[142],"Tuning":[145],"Attack":[146],"(SiTA)":[147],"method":[148],"subtly":[150],"manipulate":[151],"behavior":[153],"target":[156],"by":[158],"parallelizing":[159],"original":[165],"image":[166,199],"encoder":[167,200],"fine-tuning":[169],"dataset.":[176],"Furthermore,":[177],"an":[178],"imperceptible":[179],"frequency-domain":[180],"trigger":[181],"employed":[183],"targeted":[186],"attack,":[188],"enhancing":[189],"its":[190],"robustness":[191],"feasibility":[193],"without":[194],"necessitating":[195],"alterations":[196],"initial":[203],"model.":[204],"Extensive":[205],"experiments":[206],"conducted":[207],"three":[209],"datasets":[210],"across":[211],"multiple":[212],"downstream":[213],"tasks":[214],"demonstrate":[215],"remarkable":[217],"our":[221],"proposed":[222],"SiTA":[223],"against":[224]},"counts_by_year":[],"updated_date":"2026-03-09T07:00:12.390032","created_date":"2025-12-03T00:00:00"}
