{"id":"https://openalex.org/W4409129306","doi":"https://doi.org/10.1109/tmm.2025.3557719","title":"Towards Open-Vocabulary Video Semantic Segmentation","display_name":"Towards Open-Vocabulary Video Semantic Segmentation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4409129306","doi":"https://doi.org/10.1109/tmm.2025.3557719"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3557719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557719","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100611718","display_name":"Xinhao Li","orcid":"https://orcid.org/0000-0003-4566-5121"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinhao Li","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yun Liu","orcid":"https://orcid.org/0000-0001-6143-0264"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Liu","raw_affiliation_strings":["College of Computer Science, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010215983","display_name":"Guolei Sun","orcid":"https://orcid.org/0000-0001-8667-9656"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]},{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH","CN"],"is_corresponding":false,"raw_author_name":"Guolei Sun","raw_affiliation_strings":["Computer Vision Lab, ETH Zurich, Zurich, Switzerland","College of Computer Science, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"College of Computer Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340964","display_name":"Min Wu","orcid":"https://orcid.org/0000-0003-0977-3600"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Min Wu","raw_affiliation_strings":["Institute for Infocomm Research (I2R), Agency for Science, Technology, and Research (A*STAR), Singapore","Institute for Infocomm Research (I2R), Agency for Science, Technology, and Research (A&#x002A;STAR), Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), Agency for Science, Technology, and Research (A*STAR), Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute for Infocomm Research (I2R), Agency for Science, Technology, and Research (A&#x002A;STAR), Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115596286","display_name":"Le Zhang","orcid":"https://orcid.org/0000-0002-6930-8674"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Le Zhang","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034427070","display_name":"Ce Zhu","orcid":"https://orcid.org/0000-0001-7607-707X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ce Zhu","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100611718"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":1.2181,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77463158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"27","issue":null,"first_page":"2924","last_page":"2934"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.960099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8335649967193604},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5749045610427856},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5498006343841553},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4191635251045227},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39724496006965637},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.379991352558136},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.36275315284729004},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3257092833518982},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1037677526473999}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8335649967193604},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5749045610427856},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5498006343841553},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4191635251045227},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39724496006965637},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.379991352558136},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.36275315284729004},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3257092833518982},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1037677526473999},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3557719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557719","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G5962431186","display_name":null,"funder_award_id":"62020106011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W125693051","https://openalex.org/W1913356549","https://openalex.org/W2194775991","https://openalex.org/W2340897893","https://openalex.org/W2461677039","https://openalex.org/W2552900565","https://openalex.org/W2557465155","https://openalex.org/W2557993245","https://openalex.org/W2561585794","https://openalex.org/W2738569017","https://openalex.org/W2962815021","https://openalex.org/W2963019093","https://openalex.org/W2963445119","https://openalex.org/W2963866581","https://openalex.org/W2963917006","https://openalex.org/W2964076878","https://openalex.org/W2964254867","https://openalex.org/W2966634466","https://openalex.org/W2990230185","https://openalex.org/W3035531015","https://openalex.org/W3042609801","https://openalex.org/W3108812043","https://openalex.org/W3120875261","https://openalex.org/W3148848505","https://openalex.org/W3159637683","https://openalex.org/W3169367294","https://openalex.org/W3195135988","https://openalex.org/W3197715576","https://openalex.org/W3198020043","https://openalex.org/W3204976424","https://openalex.org/W3209325215","https://openalex.org/W4226106508","https://openalex.org/W4293680532","https://openalex.org/W4312373367","https://openalex.org/W4312458986","https://openalex.org/W4312598093","https://openalex.org/W4312909532","https://openalex.org/W4312912313","https://openalex.org/W4312919100","https://openalex.org/W4312935996","https://openalex.org/W4385245566","https://openalex.org/W4386065874","https://openalex.org/W4386076397","https://openalex.org/W4390659289","https://openalex.org/W4390872619","https://openalex.org/W4390872856","https://openalex.org/W4390872930","https://openalex.org/W4390874575","https://openalex.org/W4390874578","https://openalex.org/W4392693659","https://openalex.org/W4394699010","https://openalex.org/W4396214921","https://openalex.org/W4399039651","https://openalex.org/W4402715795","https://openalex.org/W4409369752","https://openalex.org/W6631190155","https://openalex.org/W6784333009","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6857842971"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W3167549738","https://openalex.org/W2381983017"],"abstract_inverted_index":{"Semantic":[0,31],"segmentation":[1,152],"in":[2,138,150],"videos":[3],"has":[4],"been":[5],"a":[6,42,62,68,85],"focal":[7],"point":[8],"of":[9,45,94],"recent":[10],"research.":[11],"However,":[12],"existing":[13],"models":[14],"encounter":[15],"challenges":[16],"when":[17],"faced":[18],"with":[19],"unfamiliar":[20],"categories.":[21,141],"To":[22,56],"address":[23],"this,":[24],"we":[25,60,83],"introduce":[26],"the":[27,73,91,98,111,119],"Open":[28],"Vocabulary":[29],"Video":[30],"Segmentation":[32],"(OV-VSS)":[33],"task,":[34],"designed":[35],"to":[36,75,114],"accurately":[37],"segment":[38],"every":[39],"pixel":[40],"across":[41,79,154],"wide":[43],"range":[44],"open-vocabulary":[46],"categories,":[47],"including":[48],"those":[49],"that":[50],"are":[51],"novel":[52,140],"or":[53],"previously":[54],"unexplored.":[55],"enhance":[57],"OV-VSS":[58],"performance,":[59],"propose":[61],"robust":[63],"baseline,":[64],"OV2VSS,":[65],"which":[66,109],"integrates":[67],"spatial-temporal":[69],"fusion":[70],"module,":[71,89],"allowing":[72],"model":[74],"utilize":[76],"temporal":[77],"relationships":[78],"consecutive":[80],"frames.":[81],"Additionally,":[82],"incorporate":[84],"random":[86],"frame":[87],"enhancement":[88],"broadening":[90],"model's":[92,112],"understanding":[93],"semantic":[95,151],"context":[96],"throughout":[97],"entire":[99],"video":[100,106,120,156],"sequence.":[101],"Our":[102],"approach":[103],"also":[104],"includes":[105],"text":[107],"encoding,":[108],"strengthens":[110],"capability":[113],"interpret":[115],"textual":[116],"information":[117],"within":[118],"context.":[121],"Comprehensive":[122],"evaluations":[123],"on":[124],"benchmark":[125],"datasets":[126],"such":[127],"as":[128],"VSPW":[129],"and":[130],"Cityscapes":[131],"highlight":[132],"OV-VSS's":[133],"zero-shot":[134],"generalization":[135],"capabilities,":[136],"especially":[137],"handling":[139],"The":[142],"results":[143],"validate":[144],"OV2VSS's":[145],"effectiveness,":[146],"demonstrating":[147],"improved":[148],"performance":[149],"tasks":[153],"diverse":[155],"datasets.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
