{"id":"https://openalex.org/W4405089099","doi":"https://doi.org/10.1109/iccv51701.2025.00740","title":"AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?","display_name":"AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4405089099","doi":"https://doi.org/10.1109/iccv51701.2025.00740"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00740","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.03002","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048633696","display_name":"Shouwei Ruan","orcid":"https://orcid.org/0009-0007-0481-5855"},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shouwei Ruan","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101608879","display_name":"Hong Wei Liu","orcid":"https://orcid.org/0000-0001-7116-9952"},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanqing Liu","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115021361","display_name":"Yao Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yao Huang","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100401789","display_name":"Xiaoqi Wang","orcid":"https://orcid.org/0000-0002-9195-6985"},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoqi Wang","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Caixin Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Caixin Kang","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041303642","display_name":"Hang Su","orcid":"https://orcid.org/0000-0003-4072-2529"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Su","raw_affiliation_strings":["Institute for AI, Tsinghua University,Tsinghua-Bosch Joint ML Center, THBI Lab, BNRist Center,Dept. of Comp. Sci. and Tech"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for AI, Tsinghua University,Tsinghua-Bosch Joint ML Center, THBI Lab, BNRist Center,Dept. of Comp. Sci. and Tech","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068755794","display_name":"Yinpeng Dong","orcid":"https://orcid.org/0000-0003-1299-683X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinpeng Dong","raw_affiliation_strings":["College of AI, Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of AI, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079657274","display_name":"Xingxing Wei","orcid":"https://orcid.org/0000-0002-0778-8377"},"institutions":[{"id":"https://openalex.org/I4210122052","display_name":"Virtual Reality Medical Center","ror":"https://ror.org/02rzjbv15","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210122052"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xingxing Wei","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I4210122052"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5048633696"],"corresponding_institution_ids":["https://openalex.org/I4210122052"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00105391,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7894","last_page":"7904"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9200000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3880981504917145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3880981504917145}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00740","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.03002","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.03002","pdf_url":"https://arxiv.org/pdf/2412.03002","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.03002","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.03002","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.03002","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.03002","pdf_url":"https://arxiv.org/pdf/2412.03002","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4405089099.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Vision":[0],"Language":[1],"Models":[2],"(VLMs)":[3],"have":[4],"exhibited":[5],"remarkable":[6],"generalization":[7],"capabilities,":[8],"yet":[9],"their":[10],"robustness":[11,23,166],"in":[12,137],"dynamic":[13],"real-world":[14,25,57,181],"scenarios":[15],"remains":[16],"largely":[17],"unexplored.":[18],"To":[19],"systematically":[20],"evaluate":[21,164],"VLMs'":[22],"to":[24,55,79,105,111,163,188],"3D":[26,40,58,76,169,182],"variations,":[27],"we":[28,49,65,88,122,154],"propose":[29,89],"AdvDreamer,":[30,48],"the":[31,81,124,130,138,145,157],"first":[32,158],"framework":[33],"capable":[34],"of":[35,84,173],"generating":[36],"physically":[37],"reproducible":[38],"Adversarial":[39],"Transformation":[41],"(Adv-3DT)":[42],"samples":[43,148],"from":[44],"single-view":[45],"observations.":[46],"In":[47],"integrate":[50],"three":[51],"key":[52],"innovations:":[53],"Firstly,":[54],"characterize":[56],"variations":[59,183],"with":[60,149,176],"limited":[61],"prior":[62],"knowledge":[63],"precisely,":[64],"design":[66],"a":[67,90],"zero-shot":[68],"Monocular":[69],"Pose":[70],"Manipulation":[71],"pipeline":[72],"built":[73],"upon":[74],"generative":[75],"priors.":[77],"Secondly,":[78],"ensure":[80],"visual":[82],"quality":[83],"worst-case":[85],"Adv-3DT":[86,147],"samples,":[87],"Naturalness":[91],"Reward":[92],"Model":[93],"that":[94,180],"provides":[95],"continuous":[96],"naturalness":[97],"regularization":[98],"during":[99],"adversarial":[100,131],"optimization,":[101],"effectively":[102],"preventing":[103],"convergence":[104],"hallucinated":[106],"or":[107],"unnatural":[108],"elements.":[109],"Thirdly,":[110],"enable":[112],"systematic":[113],"evaluation":[114],"across":[115,191],"diverse":[116],"VLM":[117,165],"architectures":[118,178],"and":[119,152],"visual-language":[120],"tasks,":[121],"introduce":[123],"Inverse":[125],"Semantic":[126],"Probability":[127],"loss":[128],"as":[129],"optimization":[132],"objective,":[133],"which":[134],"solely":[135],"operates":[136],"fundamental":[139],"visual-textual":[140],"alignment":[141],"space.":[142],"Based":[143],"on":[144],"captured":[146],"high":[150],"aggressiveness":[151],"transferability,":[153],"establish":[155],"MM3DTBench,":[156],"VQA":[159],"benchmark":[160],"dataset":[161],"tailored":[162],"under":[167],"challenging":[168],"variations.":[170],"Extensive":[171],"evaluations":[172],"representative":[174],"VLMs":[175],"varying":[177],"reveal":[179],"can":[184],"pose":[185],"severe":[186],"threats":[187],"model":[189],"performance":[190],"various":[192],"tasks.":[193]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
