{"id":"https://openalex.org/W4403791683","doi":"https://doi.org/10.1145/3664647.3680730","title":"Chain of Visual Perception: Harnessing Multimodal Large Language Models for Zero-shot Camouflaged Object Detection","display_name":"Chain of Visual Perception: Harnessing Multimodal Large Language Models for Zero-shot Camouflaged Object Detection","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791683","doi":"https://doi.org/10.1145/3664647.3680730"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101498637","display_name":"Lv Tang","orcid":"https://orcid.org/0000-0001-7359-1057"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lv Tang","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd, Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062345781","display_name":"Peng-Tao Jiang","orcid":"https://orcid.org/0000-0002-1786-4943"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng-Tao Jiang","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd, Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088085088","display_name":"Zhihui Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi-Hao Shen","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd, Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100397087","display_name":"Hao Zhang","orcid":"https://orcid.org/0009-0007-1175-5918"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhang","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd, Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108793115","display_name":"Jinwei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin-Wei Chen","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd., Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100374366","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-7817-0665"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd., Shanghai, China","institution_ids":["https://openalex.org/I180662265"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101498637"],"corresponding_institution_ids":["https://openalex.org/I180662265"],"apc_list":null,"apc_paid":null,"fwci":5.5263,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.9695324,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"8805","last_page":"8814"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6539437770843506},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6486767530441284},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6171125173568726},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5927782654762268},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5876429677009583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5537437796592712},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5200130939483643},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5030059218406677},{"id":"https://openalex.org/keywords/chain","display_name":"Chain (unit)","score":0.4565775394439697},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3407345116138458},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2708479166030884},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.15101531147956848},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1338556408882141},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13029775023460388}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6539437770843506},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6486767530441284},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6171125173568726},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5927782654762268},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5876429677009583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5537437796592712},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5200130939483643},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5030059218406677},{"id":"https://openalex.org/C199185054","wikidata":"https://www.wikidata.org/wiki/Q552299","display_name":"Chain (unit)","level":2,"score":0.4565775394439697},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3407345116138458},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2708479166030884},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.15101531147956848},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1338556408882141},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13029775023460388},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2943545929","https://openalex.org/W2963868681","https://openalex.org/W2982110796","https://openalex.org/W3034684132","https://openalex.org/W3094815392","https://openalex.org/W3122006940","https://openalex.org/W3164098653","https://openalex.org/W3203092180","https://openalex.org/W3203700770","https://openalex.org/W3209871728","https://openalex.org/W3210073375","https://openalex.org/W3213922487","https://openalex.org/W4221151441","https://openalex.org/W4281398151","https://openalex.org/W4281858881","https://openalex.org/W4287237767","https://openalex.org/W4287241509","https://openalex.org/W4312258849","https://openalex.org/W4313023779","https://openalex.org/W4315490105","https://openalex.org/W4367000547","https://openalex.org/W4382450131","https://openalex.org/W4386075673","https://openalex.org/W4386265875","https://openalex.org/W4390874575","https://openalex.org/W4393159738","https://openalex.org/W6788627440","https://openalex.org/W6855891829"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"In":[0],"this":[1,89,175],"paper,":[2],"we":[3,91],"introduce":[4],"a":[5,58,94],"novel":[6],"multimodal":[7],"camo-perceptive":[8],"framework":[9],"(MMCPF)":[10],"aimed":[11],"at":[12],"handling":[13],"zero-shot":[14,59,152],"Camouflaged":[15],"Object":[16],"Detection":[17],"(COD)":[18],"by":[19,115],"leveraging":[20,116],"the":[21,31,98,107,126,167],"powerful":[22],"capabilities":[23,109],"of":[24,34,84,100,110,128,169,174],"Multimodal":[25],"Large":[26],"Language":[27],"Models":[28],"(MLLMs).":[29],"Recognizing":[30],"inherent":[32],"limitations":[33],"current":[35],"COD":[36,76,134,153],"methodologies,":[37],"which":[38,104,165],"predominantly":[39],"rely":[40],"on":[41,130],"supervised":[42],"learning":[43],"models":[44],"demanding":[45],"extensive":[46],"and":[47,79,119,141,155,162],"accurately":[48],"annotated":[49],"datasets,":[50,135],"resulting":[51],"in":[52,75,112],"weak":[53],"generalization,":[54],"our":[55],"research":[56],"proposes":[57],"MMCPF":[60,129,146],"that":[61,145],"circumvents":[62],"these":[63],"challenges.":[64],"Although":[65],"MLLMs":[66,111],"hold":[67],"significant":[68],"potential":[69,168],"for":[70],"broad":[71],"applications,":[72],"their":[73],"effectiveness":[74,127],"is":[77,177],"hindered":[78],"they":[80],"would":[81],"make":[82],"misinterpretations":[83],"camouflaged":[85,113],"objects.":[86],"To":[87],"address":[88],"challenge,":[90],"further":[92],"propose":[93],"strategic":[95],"enhancement":[96],"called":[97],"Chain":[99],"Visual":[101],"Perception":[102],"(CoVP),":[103],"significantly":[105],"improves":[106],"perceptual":[108],"scenes":[114],"both":[117],"linguistic":[118],"visual":[120],"cues":[121],"more":[122],"effectively.":[123],"We":[124],"validate":[125],"five":[131],"widely":[132],"used":[133],"containing":[136],"CAMO,":[137],"COD10K,":[138],"NC4K,":[139],"MoCA-Mask":[140],"OVCamo.":[142],"Experiments":[143],"show":[144],"can":[147],"outperform":[148],"all":[149],"existing":[150],"state-of-the-art":[151],"methods,":[154,164],"achieve":[156],"competitive":[157],"performance":[158],"compared":[159],"to":[160],"weakly-supervised":[161],"fully-supervised":[163],"demonstrates":[166],"MMCPF.":[170],"The":[171],"Github":[172],"link":[173],"paper":[176],"https://github.com/luckybird1994/MMCPF.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
