{"id":"https://openalex.org/W4385764483","doi":"https://doi.org/10.24963/ijcai.2023/144","title":"SLViT: Scale-Wise Language-Guided Vision Transformer for Referring Image Segmentation","display_name":"SLViT: Scale-Wise Language-Guided Vision Transformer for Referring Image Segmentation","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385764483","doi":"https://doi.org/10.24963/ijcai.2023/144"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/144","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/144","pdf_url":"https://www.ijcai.org/proceedings/2023/0144.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0144.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045662308","display_name":"Shuyi Ouyang","orcid":"https://orcid.org/0000-0003-4507-4153"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuyi Ouyang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100701032","display_name":"Hongyi Wang","orcid":"https://orcid.org/0000-0002-1076-1140"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyi Wang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066963650","display_name":"Shiao Xie","orcid":"https://orcid.org/0000-0002-5106-5512"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiao Xie","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034985742","display_name":"Ziwei Niu","orcid":"https://orcid.org/0000-0003-0171-5158"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziwei Niu","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057181928","display_name":"Ruofeng Tong","orcid":"https://orcid.org/0000-0002-8167-5354"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruofeng Tong","raw_affiliation_strings":["Zhejiang Lab","Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044216245","display_name":"Yen\u2010Wei Chen","orcid":"https://orcid.org/0000-0002-5952-0188"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yen-Wei Chen","raw_affiliation_strings":["Ritsumeikan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ritsumeikan University","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090814258","display_name":"Lanfen Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lanfen Lin","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5044216245"],"corresponding_institution_ids":["https://openalex.org/I135768898"],"apc_list":null,"apc_paid":null,"fwci":2.422,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.9094014,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1294","last_page":"1302"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7689259052276611},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6255625486373901},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6111229658126831},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.568367600440979},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5669621229171753},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.48395803570747375},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.45751848816871643},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.42103028297424316},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41027557849884033},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36652353405952454},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3628803491592407}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7689259052276611},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6255625486373901},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6111229658126831},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.568367600440979},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5669621229171753},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.48395803570747375},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.45751848816871643},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42103028297424316},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41027557849884033},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36652353405952454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3628803491592407},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/144","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/144","pdf_url":"https://www.ijcai.org/proceedings/2023/0144.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/144","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/144","pdf_url":"https://www.ijcai.org/proceedings/2023/0144.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5899999737739563,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G5268433318","display_name":null,"funder_award_id":"2020ND8AD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5417803145","display_name":null,"funder_award_id":"LZ22F020012","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385764483.pdf"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W2144960104","https://openalex.org/W2302548814","https://openalex.org/W2489434015","https://openalex.org/W2505639562","https://openalex.org/W2605127024","https://openalex.org/W2784458614","https://openalex.org/W2798556392","https://openalex.org/W2894964039","https://openalex.org/W2896457183","https://openalex.org/W2936707910","https://openalex.org/W2963109634","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2979826702","https://openalex.org/W2980088508","https://openalex.org/W3004019157","https://openalex.org/W3020827971","https://openalex.org/W3034325957","https://openalex.org/W3034692043","https://openalex.org/W3035097537","https://openalex.org/W3089797362","https://openalex.org/W3093025045","https://openalex.org/W3094502228","https://openalex.org/W3105906614","https://openalex.org/W3108748824","https://openalex.org/W3122818000","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3158763510","https://openalex.org/W3169150690","https://openalex.org/W3169998662","https://openalex.org/W3172522282","https://openalex.org/W3178075329","https://openalex.org/W3187664142","https://openalex.org/W3191278083","https://openalex.org/W3201770677","https://openalex.org/W3216314363","https://openalex.org/W3216551675","https://openalex.org/W4200631575","https://openalex.org/W4224988000","https://openalex.org/W4226365997","https://openalex.org/W4287324101","https://openalex.org/W4296544717","https://openalex.org/W4312543911","https://openalex.org/W4313160444","https://openalex.org/W4322615526","https://openalex.org/W4327534052","https://openalex.org/W6790825729","https://openalex.org/W6803771590","https://openalex.org/W6863994431","https://openalex.org/W6864014924","https://openalex.org/W6868564194"],"related_works":["https://openalex.org/W2787993192","https://openalex.org/W2158269427","https://openalex.org/W4381280689","https://openalex.org/W2847365777","https://openalex.org/W4379231730","https://openalex.org/W3128025644","https://openalex.org/W4389858081","https://openalex.org/W2355048207","https://openalex.org/W2750422482","https://openalex.org/W1522196789"],"abstract_inverted_index":{"Referring":[0],"image":[1,11],"segmentation":[2],"aims":[3],"to":[4,25,44],"segment":[5],"an":[6,10,124],"object":[7,28],"out":[8],"of":[9,34,138],"via":[12,147],"a":[13,92,107],"specific":[14],"language":[15],"expression.":[16],"The":[17,160,173],"main":[18],"concept":[19],"is":[20,175],"establishing":[21],"global":[22,120],"visual-linguistic":[23,69,121],"relationships":[24,122],"locate":[26],"the":[27,35],"and":[29,62,80,118,144],"identify":[30,136],"boundaries":[31],"using":[32,141],"details":[33],"image.":[36],"Recently,":[37],"various":[38],"Transformer-based":[39],"techniques":[40],"have":[41,152],"been":[42],"proposed":[43],"efficiently":[45],"leverage":[46],"long-range":[47],"cross-modal":[48,63],"dependencies,":[49],"enhancing":[50],"performance":[51],"for":[52,112],"referring":[53],"segmentation.":[54],"However,":[55],"existing":[56],"methods":[57,168],"consider":[58],"visual":[59,116],"feature":[60],"extraction":[61],"fusion":[64],"separately,":[65],"resulting":[66],"in":[67,71,123],"insufficient":[68],"alignment":[70],"semantic":[72],"space.":[73],"In":[74],"addition,":[75],"they":[76],"employ":[77],"sequential":[78],"structures":[79],"hence":[81],"lack":[82],"multi-scale":[83,149],"information":[84,117],"interaction.":[85],"To":[86],"address":[87],"these":[88],"limitations,":[89],"we":[90],"propose":[91],"Scale-Wise":[93],"Language-Guided":[94,103],"Vision":[95],"Transformer":[96],"(SLViT)":[97],"with":[98,169],"two":[99],"appealing":[100],"designs:":[101],"(1)":[102],"Multi-Scale":[104],"Fusion":[105],"Attention,":[106],"novel":[108],"attention":[109],"mechanism":[110],"module":[111,133],"extracting":[113],"rich":[114],"local":[115],"modeling":[119],"integrated":[125],"manner.":[126],"(2)":[127],"An":[128],"Uncertain":[129],"Region":[130],"Cross-Scale":[131],"Enhancement":[132],"that":[134,164],"can":[135],"regions":[137],"high":[139],"uncertainty":[140],"linguistic":[142],"features":[143],"refine":[145],"them":[146],"aggregated":[148],"features.":[150],"We":[151],"evaluated":[153],"our":[154],"method":[155],"on":[156],"three":[157],"benchmark":[158],"datasets.":[159],"experimental":[161],"results":[162],"demonstrate":[163],"SLViT":[165],"surpasses":[166],"state-of-the-art":[167],"lower":[170],"computational":[171],"cost.":[172],"code":[174],"publicly":[176],"available":[177],"at:":[178],"https://github.com/NaturalKnight/SLViT.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":8}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
