{"id":"https://openalex.org/W7133337262","doi":"https://doi.org/10.48550/arxiv.2603.00565","title":"MIDAS: Multi-Image Dispersion and Semantic Reconstruction for Jailbreaking MLLMs","display_name":"MIDAS: Multi-Image Dispersion and Semantic Reconstruction for Jailbreaking MLLMs","publication_year":2026,"publication_date":"2026-02-28","ids":{"openalex":"https://openalex.org/W7133337262","doi":"https://doi.org/10.48550/arxiv.2603.00565"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00565","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122862343","display_name":"Yilian Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Yilian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128029422","display_name":"Xiaojun Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Xiaojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020360628","display_name":"Guoshun Nan","orcid":"https://orcid.org/0000-0002-1987-2736"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nan, Guoshun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128013221","display_name":"Jiuyang Lyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Jiuyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078842655","display_name":"Zhican Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhican","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100713382","display_name":"Tao Guan","orcid":"https://orcid.org/0000-0002-7568-0560"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127944353","display_name":"Shuyuan Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Shuyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048849816","display_name":"Zhongyi Zhai","orcid":"https://orcid.org/0000-0003-4935-3993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhai, Zhongyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128010275","display_name":"Yang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5122862343"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7159000039100647,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7159000039100647,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.08789999783039093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.021800000220537186,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.722599983215332},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6204000115394592},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.5843999981880188},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3921000063419342},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3725999891757965},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3617999851703644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7537000179290771},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.722599983215332},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6204000115394592},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.5843999981880188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4449000060558319},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3921000063419342},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3725999891757965},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C177562468","wikidata":"https://www.wikidata.org/wiki/Q182893","display_name":"Dispersion (optics)","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26190000772476196},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.78076171875}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"have":[5,27],"achieved":[6],"remarkable":[7],"performance":[8,162],"but":[9],"remain":[10],"vulnerable":[11],"to":[12,43,115],"jailbreak":[13,96,164,182],"attacks":[14,183],"that":[15,29,98,176],"can":[16,38],"induce":[17],"harmful":[18,100],"content":[19],"and":[20,67,90,111,131,152,173,186],"undermine":[21],"their":[22],"secure":[23],"deployment.":[24],"Previous":[25],"studies":[26],"shown":[28],"introducing":[30],"additional":[31],"inference":[32],"steps,":[33],"which":[34,61],"disrupt":[35],"security":[36,157],"attention,":[37,158],"make":[39],"MLLMs":[40,174,185],"more":[41,132],"susceptible":[42],"being":[44],"misled":[45],"into":[46,102],"generating":[47],"malicious":[48,119,150],"content.":[49],"However,":[50],"these":[51],"methods":[52],"rely":[53],"on":[54,142],"single-image":[55],"masking":[56],"or":[57],"isolated":[58],"visual":[59,109,143],"cues,":[60],"only":[62],"modestly":[63],"extend":[64],"reasoning":[65,114],"paths":[66],"thus":[68],"achieve":[69],"limited":[70],"effectiveness,":[71],"particularly":[72],"against":[73,165],"strongly":[74],"aligned":[75],"commercial":[76],"closed-source":[77,197],"models.":[78],"To":[79],"address":[80],"this":[81,84,204],"problem,":[82],"in":[83],"paper,":[85],"we":[86],"propose":[87],"Multi-Image":[88],"Dispersion":[89],"Semantic":[91],"Reconstruction":[92],"(MIDAS),":[93],"a":[94],"multimodal":[95],"framework":[97],"decomposes":[99],"semantics":[101,151],"risk-bearing":[103],"subunits,":[104],"disperses":[105],"them":[106],"across":[107,170,195],"multiple":[108],"clues,":[110],"leverages":[112],"cross-image":[113],"gradually":[116],"reconstruct":[117],"the":[118,139,147,155,161,177],"intent,":[120],"thereby":[121,159],"bypassing":[122],"existing":[123],"safety":[124],"mechanisms.":[125],"The":[126],"proposed":[127,178],"MIDAS":[128,179],"enforces":[129],"longer":[130],"structured":[133],"multi-image":[134],"chained":[135],"reasoning,":[136],"substantially":[137],"increases":[138],"model's":[140,156],"reliance":[141],"cues":[144],"while":[145],"delaying":[146],"exposure":[148],"of":[149,163,193],"significantly":[153],"reducing":[154],"improving":[160],"advanced":[166],"MLLMs.":[167,198],"Extensive":[168],"experiments":[169],"different":[171],"datasets":[172],"demonstrate":[175],"outperforms":[180],"state-of-the-art":[181],"for":[184],"achieves":[187],"an":[188],"average":[189],"attack":[190],"success":[191],"rate":[192],"81.46%":[194],"4":[196],"Our":[199],"code":[200],"is":[201],"available":[202],"at":[203],"[link](https://github.com/Winnie-Lian/MIDAS).":[205]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-04T00:00:00"}
