{"id":"https://openalex.org/W7077077966","doi":"https://doi.org/10.48550/arxiv.2507.20704","title":"Text2VLM: Adapting Text-Only Datasets to Evaluate Alignment Training in Visual Language Models","display_name":"Text2VLM: Adapting Text-Only Datasets to Evaluate Alignment Training in Visual Language Models","publication_year":2025,"publication_date":"2025-07-28","ids":{"openalex":"https://openalex.org/W7077077966","doi":"https://doi.org/10.48550/arxiv.2507.20704"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2507.20704","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20704","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2507.20704","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Downer, Gabriel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Downer, Gabriel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Craven, Sean","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Craven, Sean","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ruck, Damian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruck, Damian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Thomas, Jake","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas, Jake","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12773","display_name":"Water Quality and Resources Studies","score":0.03750000149011612,"subfield":{"id":"https://openalex.org/subfields/2312","display_name":"Water Science and Technology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12773","display_name":"Water Quality and Resources Studies","score":0.03750000149011612,"subfield":{"id":"https://openalex.org/subfields/2312","display_name":"Water Science and Technology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13370","display_name":"Diverse Scientific and Economic Studies","score":0.03150000050663948,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13398","display_name":"Data Analysis with R","score":0.02879999950528145,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.7354000210762024},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7164999842643738},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5095000267028809},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.47850000858306885},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4375},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.428600013256073},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.3986000120639801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8151000142097473},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.7354000210762024},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7164999842643738},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.47850000858306885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44749999046325684},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4375},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.3986000120639801},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3652999997138977},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3386000096797943},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3384999930858612},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2800999879837036},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2507.20704","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20704","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2507.20704","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20704","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,69],"increasing":[1],"integration":[2],"of":[3,62,95,141,167,178,190],"Visual":[4],"Language":[5],"Models":[6],"(VLMs)":[7],"into":[8,53,82],"AI":[9],"systems":[10],"necessitates":[11],"robust":[12,169],"model":[13],"alignment,":[14],"especially":[15],"when":[16,105],"handling":[17],"multimodal":[18,54,88,179],"content":[19,74],"that":[20,49],"combines":[21],"text":[22,78,145],"and":[23,79,147],"images.":[24],"Existing":[25],"evaluation":[26,94,177],"datasets":[27,52],"heavily":[28],"lean":[29],"towards":[30],"text-only":[31,51],"prompts,":[32],"leaving":[33],"visual":[34,106],"vulnerabilities":[35],"under":[36],"evaluated.":[37],"To":[38],"address":[39],"this":[40],"gap,":[41],"we":[42],"propose":[43],"\\textbf{Text2VLM},":[44],"a":[45,83,87,123,156,183],"novel":[46],"multi-stage":[47],"pipeline":[48,71],"adapts":[50],"formats,":[55],"specifically":[56],"designed":[57],"to":[58,102,122,128,164],"evaluate":[59],"the":[60,76,114,139,165,176,187],"resilience":[61],"VLMs":[63,97,191],"against":[64],"typographic":[65,84],"prompt":[66,89,103],"injection":[67,104],"attacks.":[68],"Text2VLM":[70,134,154,181],"identifies":[72],"harmful":[73],"in":[75,113,120,185,192],"original":[77],"converts":[80],"it":[81],"image,":[85],"creating":[86],"for":[90,159,172],"VLMs.":[91,173],"Also,":[92],"our":[93],"open-source":[96],"highlights":[98],"their":[99],"increased":[100],"susceptibility":[101],"inputs":[107],"are":[108],"introduced,":[109],"revealing":[110],"critical":[111],"weaknesses":[112],"current":[115],"models'":[116],"alignment.":[117],"This":[118],"is":[119],"addition":[121],"significant":[124],"performance":[125],"gap":[126],"compared":[127],"closed-source":[129],"frontier":[130],"models.":[131],"We":[132],"validate":[133],"through":[135],"human":[136,152],"evaluations,":[137],"ensuring":[138],"alignment":[140],"extracted":[142],"salient":[143],"concepts;":[144],"summarization":[146],"output":[148],"classification":[149],"align":[150],"with":[151],"expectations.":[153],"provides":[155],"scalable":[157],"tool":[158],"comprehensive":[160],"safety":[161,170],"assessment,":[162],"contributing":[163],"development":[166],"more":[168],"mechanisms":[171],"By":[174],"enhancing":[175],"vulnerabilities,":[180],"plays":[182],"role":[184],"advancing":[186],"safe":[188],"deployment":[189],"diverse,":[193],"real-world":[194],"applications.":[195]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
