{"id":"https://openalex.org/W4408355992","doi":"https://doi.org/10.1109/icassp49660.2025.10889297","title":"Robust CLIP-Guided Deep Thinking: A Two-Stage Optimization Strategy for Enhancing Adversarial Robustness and Reliability in LVLMs","display_name":"Robust CLIP-Guided Deep Thinking: A Two-Stage Optimization Strategy for Enhancing Adversarial Robustness and Reliability in LVLMs","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355992","doi":"https://doi.org/10.1109/icassp49660.2025.10889297"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889297","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889297","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107809834","display_name":"Yize Sui","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yize Sui","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020177243","display_name":"Wanrong Huang","orcid":"https://orcid.org/0000-0001-5778-9055"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanrong Huang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016603628","display_name":"Wenjing Yang","orcid":"https://orcid.org/0000-0002-6997-0406"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjing Yang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002574297","display_name":"Chaofan Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaofan Zhao","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100445021","display_name":"Jing Ren","orcid":"https://orcid.org/0000-0003-3114-3517"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Ren","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101464181","display_name":"Ji Wang","orcid":"https://orcid.org/0000-0001-7077-3402"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ji Wang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,Changsha,China,410073","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5107809834"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05733551,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9254999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9241999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.84654301404953},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.80736243724823},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6651039123535156},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5108787417411804},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.442748486995697},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.361835241317749},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3572689890861511},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.22171664237976074},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13710027933120728}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.84654301404953},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.80736243724823},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6651039123535156},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5108787417411804},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.442748486995697},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.361835241317749},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3572689890861511},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.22171664237976074},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13710027933120728},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889297","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889297","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1933349210","https://openalex.org/W1956340063","https://openalex.org/W2560730294","https://openalex.org/W2962735233","https://openalex.org/W2979382951","https://openalex.org/W3035524453","https://openalex.org/W3153469116","https://openalex.org/W3156892778","https://openalex.org/W4385801138","https://openalex.org/W4391287688","https://openalex.org/W4393149022","https://openalex.org/W4393160204","https://openalex.org/W4402753774","https://openalex.org/W4405596328","https://openalex.org/W4406800520","https://openalex.org/W6739868092","https://openalex.org/W6759129252","https://openalex.org/W6761551260","https://openalex.org/W6774469542","https://openalex.org/W6784426856","https://openalex.org/W6847311340","https://openalex.org/W6851592950","https://openalex.org/W6853116092","https://openalex.org/W6853255756","https://openalex.org/W6855469472","https://openalex.org/W6855707979","https://openalex.org/W6855815363","https://openalex.org/W6856037140","https://openalex.org/W6856669996","https://openalex.org/W6857198620","https://openalex.org/W6859227138"],"related_works":["https://openalex.org/W2033512842","https://openalex.org/W4233600955","https://openalex.org/W4322734194","https://openalex.org/W3116237489","https://openalex.org/W4404996554","https://openalex.org/W2913665393","https://openalex.org/W2369695847","https://openalex.org/W3005535424","https://openalex.org/W2994319598","https://openalex.org/W2047067935"],"abstract_inverted_index":{"Large":[0],"Vision-Language":[1],"models":[2],"(LVLMs)":[3],"have":[4],"demonstrated":[5],"remarkable":[6],"performance":[7,55,97,116,148,166],"in":[8,43,167],"a":[9,63,75,105],"wide":[10],"range":[11],"of":[12,23,56,66,92,121,135],"vision-language":[13],"tasks":[14],"as":[15],"an":[16],"efficient":[17],"input/output":[18],"system.":[19],"However,":[20],"the":[21,27,31,36,48,53,60,89,114,119,132,160],"lack":[22],"adversarial":[24,90,107,156],"robustness":[25,91,157],"at":[26,35],"input":[28],"side":[29,38],"and":[30],"widespread":[32],"hallucination":[33],"phenomenon":[34],"output":[37,133],"significantly":[39],"undermine":[40],"user":[41],"trust":[42],"them.":[44],"Current":[45],"solutions":[46],"to":[47,51,87,112,128,137,159],"former":[49],"tend":[50],"sacrifice":[52],"general":[54,96,115,147],"LVLMs,":[57],"while":[58,99,154],"solving":[59],"latter":[61],"requires":[62],"large":[64],"amount":[65],"engineering":[67],"costs.":[68],"To":[69],"address":[70],"these":[71],"challenges,":[72],"we":[73,103],"propose":[74],"two-stage":[76],"optimization":[77],"strategy":[78],"called":[79],"RCDT":[80,143],"(Robust":[81],"CLIP-guided":[82],"Deep":[83],"Thinking),":[84],"which":[85],"aims":[86],"enhance":[88],"LVLMs":[93,136],"with":[94],"minimal":[95],"loss":[98,117,149],"reducing":[100],"hallucinations.":[101,139,169],"First,":[102],"introduce":[104],"constrained":[106],"fine-tuning":[108],"approach":[109],"for":[110],"CLIP":[111,125],"limit":[113],"during":[118],"enhancement":[120],"robustness.":[122],"Furthermore,":[123],"this":[124],"is":[126],"used":[127],"think":[129],"deeply":[130],"about":[131],"process":[134],"reduce":[138,146],"Experiments":[140],"show":[141],"that":[142],"not":[144],"only":[145],"by":[150],"more":[151],"than":[152],"half":[153],"maintaining":[155],"compared":[158],"baselines,":[161],"but":[162],"also":[163],"demonstrate":[164],"good":[165],"mitigating":[168]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
