{"id":"https://openalex.org/W4414447584","doi":"https://doi.org/10.1109/iccv51701.2025.02118","title":"Plug-in Feedback Self-Adaptive Attention in CLIP for Training-Free Open-Vocabulary Segmentation","display_name":"Plug-in Feedback Self-Adaptive Attention in CLIP for Training-Free Open-Vocabulary Segmentation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4414447584","doi":"https://doi.org/10.1109/iccv51701.2025.02118"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02118","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.20265","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064393186","display_name":"Zhixiang Chi","orcid":"https://orcid.org/0000-0003-4560-4986"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Zhixiang Chi","raw_affiliation_strings":["University of Toronto"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046147392","display_name":"Yanan Wu","orcid":"https://orcid.org/0000-0003-2291-1334"},"institutions":[{"id":"https://openalex.org/I52158045","display_name":"China Agricultural University","ror":"https://ror.org/04v3ywz14","country_code":"CN","type":"education","lineage":["https://openalex.org/I52158045"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanan Wu","raw_affiliation_strings":["China Agricultural University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Agricultural University","institution_ids":["https://openalex.org/I52158045"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109543250","display_name":"Li Gu","orcid":"https://orcid.org/0000-0002-0265-0491"},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Li Gu","raw_affiliation_strings":["Concordia University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Concordia University","institution_ids":["https://openalex.org/I60158472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103119714","display_name":"Huan Liu","orcid":"https://orcid.org/0009-0000-6347-5060"},"institutions":[{"id":"https://openalex.org/I98251732","display_name":"McMaster University","ror":"https://ror.org/02fa3aq29","country_code":"CA","type":"education","lineage":["https://openalex.org/I98251732"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Huan Liu","raw_affiliation_strings":["McMaster University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"McMaster University","institution_ids":["https://openalex.org/I98251732"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100400469","display_name":"Ziqiang Wang","orcid":"https://orcid.org/0000-0001-5633-1608"},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ziqiang Wang","raw_affiliation_strings":["Concordia University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Concordia University","institution_ids":["https://openalex.org/I60158472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354571","display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0001-6821-2710"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["Beijing Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100322712","display_name":"Yan Wang","orcid":"https://orcid.org/0000-0002-5344-1884"},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["Concordia University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Concordia University","institution_ids":["https://openalex.org/I60158472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059152392","display_name":"Konstantinos N. Plataniotis","orcid":"https://orcid.org/0000-0003-3647-5473"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Konstantinos N Plataniotis","raw_affiliation_strings":["University of Toronto"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5064393186"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12719539,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"22815","last_page":"22825"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9373000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6777999997138977},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.6596999764442444},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5483999848365784},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.40380001068115234},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.36340001225471497},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35920000076293945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7771000266075134},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6777999997138977},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.6596999764442444},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5910999774932861},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40799999237060547},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.36340001225471497},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35920000076293945},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C2985909886","wikidata":"https://www.wikidata.org/wiki/Q193147","display_name":"Spatial coherence","level":3,"score":0.3418000042438507},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.25450000166893005}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02118","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2508.20265","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.20265","pdf_url":"https://arxiv.org/pdf/2508.20265","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.20265","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.20265","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.20265","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.20265","pdf_url":"https://arxiv.org/pdf/2508.20265","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"CLIP":[0],"exhibits":[1],"strong":[2],"visual-textual":[3],"alignment":[4],"but":[5],"struggle":[6],"with":[7,46,156,176],"open-vocabulary":[8],"segmentation":[9],"due":[10,33],"to":[11,29,34,74,136],"poor":[12],"localization.":[13],"Prior":[14],"methods":[15],"enhance":[16],"spatial":[17,118],"coherence":[18,25,119,141],"by":[19,110],"modifying":[20],"intermediate":[21,41,76],"attention.":[22,77],"But,":[23],"this":[24,59],"isn't":[26],"consistently":[27,183],"propagated":[28],"the":[30,53,75,82,85,89,112,139],"final":[31,108],"output":[32,79,140],"subsequent":[35],"operations":[36],"such":[37,49],"as":[38,115,146],"projections.":[39],"Additionally,":[40],"attention":[42,126,169],"lacks":[43],"direct":[44],"interaction":[45],"text":[47],"representations,":[48],"semantic":[50,102],"discrepancy":[51],"limits":[52],"full":[54],"potential":[55],"of":[56,84],"CLIP.":[57],"In":[58],"work,":[60],"we":[61],"propose":[62],"a":[63,116,147],"training-free,":[64],"feedback-driven":[65],"self-adaptive":[66],"framework":[67,166],"that":[68],"adapts":[69],"output-based":[70],"patch-level":[71],"correspondences":[72],"back":[73],"The":[78],"predictions,":[80],"being":[81],"culmination":[83],"model's":[86,113],"processing,":[87],"encapsulate":[88],"most":[90],"comprehensive":[91],"visual":[92],"and":[93,107,133,173,179],"textual":[94],"semantics":[95],"about":[96],"each":[97],"patch.":[98],"Our":[99,143,181],"approach":[100,182],"enhances":[101],"consistency":[103],"between":[104],"internal":[105],"representations":[106],"predictions":[109],"leveraging":[111],"outputs":[114],"stronger":[117],"prior.":[120],"We":[121,162],"design":[122],"key":[123],"modules,":[124],"including":[125],"isolation,":[127],"confidence-based":[128],"pruning":[129],"for":[130],"sparse":[131],"adaptation,":[132],"adaptation":[134],"ensemble,":[135],"effectively":[137],"feedback":[138],"cues.":[142],"method":[144],"functions":[145],"plug-in":[148],"module,":[149],"seamlessly":[150],"integrating":[151],"into":[152],"four":[153],"state-of-the-art":[154],"approaches":[155],"three":[157],"backbones":[158],"(ViT-B,":[159],"ViT-L,":[160],"ViT-H).":[161],"further":[163],"validate":[164],"our":[165],"across":[167,187],"multiple":[168],"types":[170],"(Q-K,":[171],"self-self,":[172],"Proxy":[174],"augmented":[175],"MAE,":[177],"SAM,":[178],"DINO).":[180],"improves":[184],"their":[185],"performance":[186],"eight":[188],"benchmarks.":[189]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
