{"id":"https://openalex.org/W4415539661","doi":"https://doi.org/10.1145/3746027.3755175","title":"Stepwise Decomposition and Dual-stream Focus: A Novel Approach for Training-free Camouflaged Object Segmentation","display_name":"Stepwise Decomposition and Dual-stream Focus: A Novel Approach for Training-free Camouflaged Object Segmentation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415539661","doi":"https://doi.org/10.1145/3746027.3755175"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755175","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755175","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104137497","display_name":"Chao Yin","orcid":"https://orcid.org/0009-0005-9565-1571"},"institutions":[{"id":"https://openalex.org/I113940042","display_name":"Shanghai University","ror":"https://ror.org/006teas31","country_code":"CN","type":"education","lineage":["https://openalex.org/I113940042"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Yin","raw_affiliation_strings":["Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-9565-1571","affiliations":[{"raw_affiliation_string":"Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I113940042"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Li","orcid":"https://orcid.org/0000-0002-8827-8351"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Li","raw_affiliation_strings":["University of the Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8827-8351","affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083388855","display_name":"Kequan Yang","orcid":"https://orcid.org/0000-0002-5084-3474"},"institutions":[{"id":"https://openalex.org/I113940042","display_name":"Shanghai University","ror":"https://ror.org/006teas31","country_code":"CN","type":"education","lineage":["https://openalex.org/I113940042"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kequan Yang","raw_affiliation_strings":["Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-5084-3474","affiliations":[{"raw_affiliation_string":"Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I113940042"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046247173","display_name":"Jide Li","orcid":"https://orcid.org/0000-0002-0754-5842"},"institutions":[{"id":"https://openalex.org/I113940042","display_name":"Shanghai University","ror":"https://ror.org/006teas31","country_code":"CN","type":"education","lineage":["https://openalex.org/I113940042"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jide Li","raw_affiliation_strings":["Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-0754-5842","affiliations":[{"raw_affiliation_string":"Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I113940042"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100513239","display_name":"Pinpin Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I113940042","display_name":"Shanghai University","ror":"https://ror.org/006teas31","country_code":"CN","type":"education","lineage":["https://openalex.org/I113940042"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pinpin Zhu","raw_affiliation_strings":["Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-9781-5429","affiliations":[{"raw_affiliation_string":"Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I113940042"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100339621","display_name":"Xiaoqiang Li","orcid":"https://orcid.org/0000-0001-7243-2783"},"institutions":[{"id":"https://openalex.org/I113940042","display_name":"Shanghai University","ror":"https://ror.org/006teas31","country_code":"CN","type":"education","lineage":["https://openalex.org/I113940042"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoqiang Li","raw_affiliation_strings":["Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-7243-2783","affiliations":[{"raw_affiliation_string":"Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I113940042"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5104137497"],"corresponding_institution_ids":["https://openalex.org/I113940042"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28845512,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3741","last_page":"3750"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.8226000070571899},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6812000274658203},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6640999913215637},{"id":"https://openalex.org/keywords/scale-space-segmentation","display_name":"Scale-space segmentation","score":0.5008999705314636},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.49790000915527344},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.4577000141143799},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4512999951839447},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4503999948501587},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.43290001153945923}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.8226000070571899},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7767999768257141},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7429999709129333},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6812000274658203},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6640999913215637},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.607699990272522},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.5008999705314636},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.49790000915527344},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.4577000141143799},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4512999951839447},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4503999948501587},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.42160001397132874},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3977000117301941},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.3249000012874603},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.28209999203681946},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.2635999917984009},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755175","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755175","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2943545929","https://openalex.org/W3177087374","https://openalex.org/W4315490105","https://openalex.org/W4382465636","https://openalex.org/W4388574886","https://openalex.org/W4391465821","https://openalex.org/W4393159738","https://openalex.org/W4393159912","https://openalex.org/W4393161084","https://openalex.org/W4400975057","https://openalex.org/W4401564450","https://openalex.org/W4402159347","https://openalex.org/W4402494392","https://openalex.org/W4405316450","https://openalex.org/W4405364185","https://openalex.org/W4405519151","https://openalex.org/W4406051699","https://openalex.org/W4407056550","https://openalex.org/W4409209673"],"related_works":[],"abstract_inverted_index":{"While":[0],"promptable":[1,27],"segmentation":[2,10,28,46,192],"(e.g.,":[3],"SAM)":[4],"has":[5],"shown":[6],"promise":[7],"for":[8,18,34,170],"various":[9],"tasks,":[11],"it":[12],"still":[13,61],"requires":[14],"manual":[15],"visual":[16,97,163,168],"prompts":[17,37,169],"each":[19],"object":[20,107],"to":[21,30,44,54,84,115,153],"be":[22,201],"segmented.":[23],"In":[24],"contrast,":[25],"task-generic":[26,42],"aims":[29],"reduce":[31],"the":[32,121],"need":[33],"such":[35],"detailed":[36],"by":[38],"employing":[39],"only":[40],"a":[41,127,190],"prompt":[43],"guide":[45],"across":[47],"all":[48],"test":[49],"samples.":[50],"However,":[51],"when":[52],"applied":[53],"Camouflaged":[55],"Object":[56],"Segmentation":[57],"(COS),":[58],"current":[59],"methods":[60],"face":[62],"two":[63],"critical":[64],"issues:":[65],"1)":[66],"semantic":[67,88,155,177],"ambiguity":[68],"in":[69,80,94],"getting":[70,95],"instance-specific":[71,96],"text":[72],"prompts,":[73,98],"which":[74,99],"arises":[75],"from":[76,101,106],"insufficient":[77],"discriminative":[78],"cues":[79],"holistic":[81],"captions,":[82],"leading":[83],"foreground-background":[85],"confusion;":[86],"2)":[87],"discrepancy":[89,178],"combined":[90],"with":[91,109],"spatial":[92,160,180],"separation":[93],"results":[100],"global":[102],"background":[103,173],"sampling":[104],"far":[105],"boundaries":[108],"low":[110],"feature":[111],"correlation,":[112],"causing":[113],"SAM":[114],"segment":[116],"irrelevant":[117],"regions.":[118],"To":[119],"mitigate":[120],"issues":[122],"above,":[123],"we":[124],"propose":[125],"RDVP-MSD,":[126],"novel":[128],"training-free":[129],"test-time":[130],"adaptation":[131],"framework":[132],"that":[133],"synergizes":[134],"Region-constrained":[135],"Dual-stream":[136],"Visual":[137],"Prompting":[138],"(RDVP)":[139],"via":[140],"Multimodal":[141],"Stepwise":[142],"Decomposition":[143],"Chain":[144],"of":[145],"Thought":[146],"(MSD-CoT).":[147],"MSD-CoT":[148],"progressively":[149],"disentangles":[150],"image":[151],"captions":[152],"eliminate":[154],"ambiguity,":[156],"while":[157],"RDVP":[158],"injects":[159],"constraints":[161],"into":[162],"prompting":[164],"and":[165,172,179],"independently":[166],"samples":[167],"foreground":[171],"points,":[174],"effectively":[175],"mitigating":[176],"separation.":[181],"Without":[182],"requiring":[183],"any":[184],"training":[185],"or":[186],"supervision,":[187],"RDVP-MSD":[188],"achieves":[189],"state-of-the-art":[191],"result":[193],"on":[194],"multiple":[195],"COS":[196],"benchmarks.":[197],"The":[198],"codes":[199],"will":[200],"available":[202],"at":[203],"https://github.com/ycyinchao/RDVP-MSD.":[204]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-25T00:00:00"}
