{"id":"https://openalex.org/W4408810164","doi":"https://doi.org/10.1109/tip.2025.3551648","title":"Diffusion Model is Secretly a Training-Free Open Vocabulary Semantic Segmenter","display_name":"Diffusion Model is Secretly a Training-Free Open Vocabulary Semantic Segmenter","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4408810164","doi":"https://doi.org/10.1109/tip.2025.3551648","pmid":"https://pubmed.ncbi.nlm.nih.gov/40126966"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2025.3551648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3551648","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jinglong Wang","orcid":"https://orcid.org/0000-0001-7928-9035"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinglong Wang","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102833206","display_name":"Xiawei Li","orcid":"https://orcid.org/0000-0002-2709-7020"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiawei Li","raw_affiliation_strings":["Business Research and Development Department, Baidu, Beijing, China","Business RD Department, Baidu, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Business Research and Development Department, Baidu, Beijing, China","institution_ids":["https://openalex.org/I98301712"]},{"raw_affiliation_string":"Business RD Department, Baidu, Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052709846","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0003-3516-0111"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110705023","display_name":"Qingyuan Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyuan Xu","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101793645","display_name":"Qin Zhou","orcid":"https://orcid.org/0009-0007-9053-5381"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Zhou","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101600341","display_name":"Qian Yu","orcid":"https://orcid.org/0000-0002-0538-7940"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Yu","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035443556","display_name":"Lu Sheng","orcid":"https://orcid.org/0000-0002-8525-9163"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Sheng","raw_affiliation_strings":["School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082181536","display_name":"Dong Xu","orcid":"https://orcid.org/0000-0003-2775-9730"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Dong Xu","raw_affiliation_strings":["Department of Computer Science, The University of Hong Kong, Kowloon Tong, Hong Kong","School of Software, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708","https://openalex.org/I889458895"]},{"raw_affiliation_string":"School of Software, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":44.7799,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.99767425,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"34","issue":null,"first_page":"1895","last_page":"1907"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.62568199634552},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5566156506538391},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5530751347541809},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4382178783416748},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10901269316673279}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62568199634552},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5566156506538391},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5530751347541809},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4382178783416748},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10901269316673279},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2025.3551648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3551648","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:40126966","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40126966","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6899999976158142,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1121367935","display_name":null,"funder_award_id":"62461160331","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5512158772","display_name":null,"funder_award_id":"No.62132001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6078352744","display_name":null,"funder_award_id":"62132001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8547480902","display_name":null,"funder_award_id":"No.62461160331","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2031489346","https://openalex.org/W2125215748","https://openalex.org/W2161236525","https://openalex.org/W2225156818","https://openalex.org/W2962867364","https://openalex.org/W3034373787","https://openalex.org/W3034930876","https://openalex.org/W3177958285","https://openalex.org/W4226058394","https://openalex.org/W4312330522","https://openalex.org/W4312420092","https://openalex.org/W4312509967","https://openalex.org/W4312566218","https://openalex.org/W4312709262","https://openalex.org/W4312824283","https://openalex.org/W4312912313","https://openalex.org/W4312933868","https://openalex.org/W4312935996","https://openalex.org/W4312960937","https://openalex.org/W4312980231","https://openalex.org/W4313052647","https://openalex.org/W4385270985","https://openalex.org/W4385569875","https://openalex.org/W4386065432","https://openalex.org/W4386071798","https://openalex.org/W4386075561","https://openalex.org/W4386075766","https://openalex.org/W4386075819","https://openalex.org/W4390871935","https://openalex.org/W4390872297","https://openalex.org/W4390872636","https://openalex.org/W4390872669","https://openalex.org/W4390872876","https://openalex.org/W4390873045","https://openalex.org/W4390873195","https://openalex.org/W4390873426","https://openalex.org/W4390874575","https://openalex.org/W4401417679","https://openalex.org/W4402817070","https://openalex.org/W4402961794","https://openalex.org/W4403012098","https://openalex.org/W4404988814","https://openalex.org/W6779823529","https://openalex.org/W6791353385","https://openalex.org/W6802404399","https://openalex.org/W6804703708","https://openalex.org/W6805547681","https://openalex.org/W6811013733","https://openalex.org/W6839009181","https://openalex.org/W6841366371","https://openalex.org/W6846611385","https://openalex.org/W6846835116","https://openalex.org/W6847405979","https://openalex.org/W6850241730","https://openalex.org/W6853693339","https://openalex.org/W6854424048","https://openalex.org/W6860983728","https://openalex.org/W6874247451"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,112],"pre-trained":[1],"text-image":[2],"discriminative":[3],"models,":[4],"such":[5],"as":[6,98],"CLIP,":[7],"has":[8,33],"been":[9,34],"explored":[10],"for":[11,57,208],"open-vocabulary":[12,101,209],"semantic":[13,49,66,102,210],"segmentation":[14,174,192],"with":[15],"unsatisfactory":[16],"results":[17,207],"due":[18],"to":[19,48,64,116,124,188],"the":[20,40,88,125,129,134,146,152,156,161,166,170,191,202],"loss":[21],"of":[22,28,42,75,90,172],"crucial":[23],"localization":[24],"information":[25],"and":[26,104,133,183],"awareness":[27],"object":[29,131,147],"shapes.":[30],"Recently,":[31],"there":[32],"a":[35,72,106,184],"growing":[36],"interest":[37],"in":[38],"expanding":[39],"application":[41],"generative":[43,54,91],"models":[44,55,94],"from":[45],"generation":[46],"tasks":[47],"segmentation.":[50,67,211],"These":[51],"approaches":[52],"utilize":[53],"either":[56],"generating":[58,71],"annotated":[59],"data":[60,77],"or":[61,78],"extracting":[62],"features":[63],"facilitate":[65],"This":[68],"typically":[69],"involves":[70],"considerable":[73],"amount":[74],"synthetic":[76],"requiring":[79],"additional":[80],"mask":[81],"annotations.":[82],"To":[83],"this":[84],"end,":[85],"we":[86,177],"uncover":[87],"potential":[89],"text-to-image":[92],"diffusion":[93,141],"(e.g.,":[95],"Stable":[96],"Diffusion)":[97],"highly":[99],"efficient":[100],"segmenters,":[103],"introduce":[105],"novel":[107],"training-free":[108],"approach":[109],"named":[110],"DiffSegmenter.":[111],"insight":[113],"is":[114],"that":[115,120,145,201],"generate":[117],"realistic":[118],"objects":[119],"are":[121,137,149,158],"semantically":[122],"faithful":[123],"input":[126],"text,":[127],"both":[128],"complete":[130],"shapes":[132,148],"corresponding":[135],"semantics":[136,157],"implicitly":[138],"learned":[139],"by":[140,151,165],"models.":[142],"We":[143],"discover":[144],"characterized":[150],"self-attention":[153],"maps":[154,163],"while":[155],"indicated":[159],"through":[160],"cross-attention":[162],"produced":[164],"denoising":[167],"U-Net,":[168],"forming":[169],"basis":[171],"our":[173],"results.":[175,193],"Additionally,":[176],"carefully":[178],"design":[179],"effective":[180],"textual":[181],"prompts":[182],"category":[185],"filtering":[186],"mechanism":[187],"further":[189],"enhance":[190],"Extensive":[194],"experiments":[195],"on":[196],"three":[197],"benchmark":[198],"datasets":[199],"show":[200],"proposed":[203],"DiffSegmenter":[204],"achieves":[205],"impressive":[206]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":18}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
