{"id":"https://openalex.org/W7125945975","doi":"https://doi.org/10.1109/smc58881.2025.11343674","title":"Semantic Token Enhancement and Clustering-Guided Activation for Weakly Supervised Semantic Segmentation","display_name":"Semantic Token Enhancement and Clustering-Guided Activation for Weakly Supervised Semantic Segmentation","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125945975","doi":"https://doi.org/10.1109/smc58881.2025.11343674"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11343674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123405533","display_name":"Jingjing Hou","orcid":null},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingjing Hou","raw_affiliation_strings":["Chongqing University,College of Computer Science,China,401331"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,China,401331","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124069296","display_name":"Yuheng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuheng Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5012276947","display_name":"T. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Taiping Zhang","raw_affiliation_strings":["Chongqing University,College of Computer Science,China,401331"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,China,401331","institution_ids":["https://openalex.org/I158842170"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5123405533"],"corresponding_institution_ids":["https://openalex.org/I158842170"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71876714,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2697","last_page":"2702"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8173999786376953,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8173999786376953,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.04600000008940697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.022199999541044235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6978999972343445},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6272000074386597},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49219998717308044},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4884999990463257},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.484499990940094},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4636000096797943},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4569000005722046}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7592999935150146},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6978999972343445},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6272000074386597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5848000049591064},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49219998717308044},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4884999990463257},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4636000096797943},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4569000005722046},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4309000074863434},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4043000042438507},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3935000002384186},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3465999960899353},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30140000581741333},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2800999879837036}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11343674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2962758679","https://openalex.org/W3132851869","https://openalex.org/W3142837074","https://openalex.org/W3164772195","https://openalex.org/W3175456851","https://openalex.org/W3177958285","https://openalex.org/W3183732083","https://openalex.org/W3202524699","https://openalex.org/W3203879378","https://openalex.org/W4200097795","https://openalex.org/W4200634368","https://openalex.org/W4220950530","https://openalex.org/W4226158211","https://openalex.org/W4312509967","https://openalex.org/W4312836939","https://openalex.org/W4313052647","https://openalex.org/W4313139852","https://openalex.org/W4386066092","https://openalex.org/W4386071966","https://openalex.org/W4386113282","https://openalex.org/W4390603931","https://openalex.org/W4393147872","https://openalex.org/W4398757498","https://openalex.org/W4402727630"],"related_works":[],"abstract_inverted_index":{"Weakly-Supervised":[0],"Semantic":[1],"Segmentation":[2],"(WSSS)":[3],"methods":[4,42,150],"based":[5],"on":[6,65,67,166],"image-level":[7],"annotations":[8],"often":[9],"employ":[10],"Class":[11],"Activation":[12],"Maps":[13],"(CAM)":[14],"to":[15,31,125,145],"construct":[16],"pseudo":[17,89],"labels":[18],"for":[19],"dense":[20],"prediction.":[21],"Recently,":[22],"Transformer-Based":[23],"WSSS":[24,177],"approaches":[25],"have":[26],"attracted":[27],"increasing":[28],"attention":[29,50,77,98,128,131],"due":[30],"the":[32,59,86,126,142],"strong":[33],"capability":[34],"of":[35,72,88],"Transformer":[36,63],"in":[37,62,75,85],"capturing":[38],"global":[39],"context.":[40],"These":[41],"typically":[43],"generate":[44,152],"object":[45,113,144],"localization":[46],"maps":[47,78],"by":[48,121],"modeling":[49],"interactions":[51],"between":[52],"class":[53,155],"tokens":[54,140],"and":[55,79,104],"patch":[56,139],"tokens.":[57],"However,":[58],"self-attention":[60],"mechanism":[61],"tends":[64],"focus":[66],"only":[68],"a":[69,133,153],"limited":[70],"number":[71],"tokens,":[73],"resulting":[74],"sparse":[76],"consequently":[80],"overlooking":[81],"semantically":[82],"relevant":[83],"regions":[84],"generation":[87],"labels.":[90],"In":[91],"this":[92],"paper,":[93],"we":[94],"propose":[95],"two":[96],"novel":[97],"mechanisms:":[99],"SCAR":[100,116],"(selective":[101],"cross-attention":[102,123],"refinement)":[103],"Self-Squared":[105,130],"attention,":[106],"both":[107],"aimed":[108],"at":[109],"uniformly":[110],"highlighting":[111],"entire":[112],"regions.":[114,129],"The":[115,148],"module":[117,135],"extracts":[118],"localized":[119],"features":[120],"restricting":[122],"operations":[124],"predicted":[127],"includes":[132],"clustering-aware":[134],"that":[136,171],"groups":[137],"similar":[138],"from":[141],"same":[143],"guide":[146],"activation.":[147],"proposed":[149],"ultimately":[151],"clustering-guided":[154],"activation":[156],"map,":[157],"which":[158],"captures":[159],"more":[160],"comprehensive":[161],"region":[162],"coverage.":[163],"Experimental":[164],"results":[165],"standard":[167],"benchmark":[168],"datasets":[169],"demonstrate":[170],"our":[172],"approach":[173],"consistently":[174],"surpasses":[175],"previous":[176],"approaches.":[178]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
