{"id":"https://openalex.org/W7155074914","doi":"https://doi.org/10.48550/arxiv.2604.16499","title":"HQA-VLAttack: Towards High Quality Adversarial Attack on Vision-Language Pre-Trained Models","display_name":"HQA-VLAttack: Towards High Quality Adversarial Attack on Vision-Language Pre-Trained Models","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7155074914","doi":"https://doi.org/10.48550/arxiv.2604.16499"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16499","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134193549","display_name":"Han Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134194150","display_name":"Jiaqi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiaqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134110169","display_name":"Zhi Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134153967","display_name":"Xiaotong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaotong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134129123","display_name":"Xiaoming Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xiaoming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134118248","display_name":"Fenglong Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Fenglong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134164432","display_name":"Yuanman Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuanman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134209632","display_name":"Hong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5134193549"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7906000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7906000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04190000146627426,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.021299999207258224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.8932999968528748},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.578000009059906},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5386999845504761},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.46149998903274536},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4359000027179718},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.33709999918937683},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3330000042915344}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.8932999968528748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7013000249862671},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.578000009059906},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5386999845504761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4853000044822693},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.46149998903274536},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4359000027179718},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33079999685287476},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30480000376701355},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29339998960494995},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28349998593330383},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27410000562667847},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C65856478","wikidata":"https://www.wikidata.org/wiki/Q3991682","display_name":"Attack model","level":2,"score":0.2678999900817871}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4711686670780182,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Black-box":[0],"adversarial":[1,105,159,175,201],"attack":[2,88,119,215,234],"on":[3,31,51,107,220],"vision-language":[4,108],"pre-trained":[5,109],"models":[6],"is":[7,34,186,194],"a":[8,40,52,97],"practical":[9],"and":[10,15,23,38,117,146,166,203],"challenging":[11],"task,":[12],"as":[13],"text":[14,116,122],"image":[16,118,151,158,174],"perturbations":[17],"need":[18],"to":[19,102,131,171,208],"be":[20,81],"considered":[21],"simultaneously,":[22],"only":[24,39,63],"the":[25,66,87,92,127,133,139,143,147,157,162,173,180,199,214],"predicted":[26],"results":[27,219],"are":[28,44,205],"accessible.":[29],"Research":[30],"this":[32,197],"problem":[33],"in":[35,231],"its":[36],"infancy,":[37],"handful":[41],"of":[42,68,75,115,182,190,233],"methods":[43,48],"available.":[45],"Nevertheless,":[46],"existing":[47],"either":[49],"rely":[50],"complex":[53],"iterative":[54],"cross-search":[55],"strategy,":[56,165],"which":[57,78,113,177],"inevitably":[58,85],"consumes":[59],"numerous":[60],"queries,":[61],"or":[62],"consider":[64],"reducing":[65],"similarity":[67,181],"positive":[69,183],"image-text":[70,184,192],"pairs":[71,185,193],"but":[72],"ignore":[73],"that":[74,179,189,225],"negative":[76,191,210],"ones,":[77],"will":[79],"also":[80],"implicitly":[82],"diminished,":[83],"thus":[84,137],"affecting":[86],"performance.":[89],"To":[90],"alleviate":[91],"above":[93],"issues,":[94],"we":[95],"propose":[96],"simple":[98],"yet":[99],"effective":[100],"framework":[101],"generate":[103,132],"high-quality":[104],"examples":[106],"models,":[110],"named":[111],"HQA-VLAttack,":[112],"consists":[114],"stages.":[120],"For":[121,150],"perturbation":[123,152],"generation,":[124,153],"it":[125,154],"leverages":[126],"counter-fitting":[128],"word":[129,135,145],"vector":[130],"substitute":[134,144],"set,":[136],"guaranteeing":[138],"semantic":[140],"consistency":[141],"between":[142],"original":[148],"word.":[149],"first":[155],"initializes":[156],"example":[160],"via":[161],"layer-importance":[163],"guided":[164],"then":[167],"utilizes":[168],"contrastive":[169],"learning":[170],"optimize":[172],"perturbation,":[176],"ensures":[178],"decreased":[187],"while":[188],"increased.":[195],"In":[196],"way,":[198],"optimized":[200],"images":[202],"texts":[204],"more":[206],"likely":[207],"retrieve":[209],"examples,":[211],"thereby":[212],"enhancing":[213],"success":[216,235],"rate.":[217,236],"Experimental":[218],"three":[221],"benchmark":[222],"datasets":[223],"demonstrate":[224],"HQA-VLAttack":[226],"significantly":[227],"outperforms":[228],"strong":[229],"baselines":[230],"terms":[232]},"counts_by_year":[],"updated_date":"2026-04-22T06:07:44.442478","created_date":"2026-04-22T00:00:00"}
