{"id":"https://openalex.org/W4414603932","doi":"https://doi.org/10.1109/tpami.2025.3612474","title":"Semantic-Assisted Object Clustering for Multi-Modal Referring Video Segmentation","display_name":"Semantic-Assisted Object Clustering for Multi-Modal Referring Video Segmentation","publication_year":2025,"publication_date":"2025-09-29","ids":{"openalex":"https://openalex.org/W4414603932","doi":"https://doi.org/10.1109/tpami.2025.3612474","pmid":"https://pubmed.ncbi.nlm.nih.gov/41021938"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3612474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3612474","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.qub.ac.uk/en/publications/1e8b4599-94eb-48b6-9a54-f1cd40fdae84","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101685090","display_name":"Yong Liu","orcid":"https://orcid.org/0009-0000-3078-1598"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yong Liu","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104165079","display_name":"Zhuoyan Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuoyan Luo","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046633330","display_name":"Yicheng Xiao","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yicheng Xiao","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100650009","display_name":"Yitong Wang","orcid":"https://orcid.org/0000-0002-7559-4152"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yitong Wang","raw_affiliation_strings":["ByteDance Inc at Shenzhen, Shenzhen, China","ByteDance Inc, Shenzhen"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"ByteDance Inc, Shenzhen","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669301","display_name":"Shuyan Li","orcid":"https://orcid.org/0000-0002-5107-0338"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shuyan Li","raw_affiliation_strings":["Queen&#x2019;s University Belfast, Belfast, U.K","Queen&#x0027;s University Belfast, U.K"],"affiliations":[{"raw_affiliation_string":"Queen&#x2019;s University Belfast, Belfast, U.K","institution_ids":["https://openalex.org/I126231945"]},{"raw_affiliation_string":"Queen&#x0027;s University Belfast, U.K","institution_ids":["https://openalex.org/I126231945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100754504","display_name":"Xiu Li","orcid":"https://orcid.org/0000-0003-0403-1923"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Li","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020953714","display_name":"Yujiu Yang","orcid":"https://orcid.org/0000-0002-6427-1024"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujiu Yang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101926293","display_name":"Yansong Tang","orcid":"https://orcid.org/0000-0002-1534-4549"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yansong Tang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","Tsinghua Shenzhen International Graduate School, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101685090"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25629437,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"48","issue":"1","first_page":"572","last_page":"590"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5946000218391418},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.49559998512268066},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45910000801086426},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.45489999651908875},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.44440001249313354},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.4122999906539917},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4025000035762787},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.3953999876976013},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3824000060558319},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.3783999979496002}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8629000186920166},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7502999901771545},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5946000218391418},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5633999705314636},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.49559998512268066},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45910000801086426},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.45489999651908875},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.44440001249313354},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4025000035762787},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3824000060558319},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.3783999979496002},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3481999933719635},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.3384999930858612},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3384000062942505},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C106752470","wikidata":"https://www.wikidata.org/wiki/Q1364826","display_name":"Joint entropy","level":3,"score":0.2858000099658966},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.25839999318122864},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.25279998779296875},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.25029999017715454}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tpami.2025.3612474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3612474","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41021938","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41021938","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null},{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/1e8b4599-94eb-48b6-9a54-f1cd40fdae84","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/1e8b4599-94eb-48b6-9a54-f1cd40fdae84","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Liu, Y, Luo, Z, Xiao, Y, Wang, Y, Li, S, Li, X, Yang, Y & Tang, Y 2026, 'Semantic-assisted object clustering for multi-modal referring video segmentation', IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 48, no. 1, pp. 572-590. https://doi.org/10.1109/TPAMI.2025.3612474","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/1e8b4599-94eb-48b6-9a54-f1cd40fdae84","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/1e8b4599-94eb-48b6-9a54-f1cd40fdae84","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Liu, Y, Luo, Z, Xiao, Y, Wang, Y, Li, S, Li, X, Yang, Y & Tang, Y 2026, 'Semantic-assisted object clustering for multi-modal referring video segmentation', IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 48, no. 1, pp. 572-590. https://doi.org/10.1109/TPAMI.2025.3612474","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2275985752","display_name":null,"funder_award_id":"62576191","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6806237773","display_name":null,"funder_award_id":"62206153","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W2125637308","https://openalex.org/W2222512263","https://openalex.org/W2251512949","https://openalex.org/W2302548814","https://openalex.org/W2395611524","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2606746036","https://openalex.org/W2948080074","https://openalex.org/W2962942822","https://openalex.org/W2963354481","https://openalex.org/W2964284374","https://openalex.org/W2980088508","https://openalex.org/W2990152177","https://openalex.org/W2997063389","https://openalex.org/W3034325957","https://openalex.org/W3034692043","https://openalex.org/W3034777757","https://openalex.org/W3034804856","https://openalex.org/W3035097537","https://openalex.org/W3096609285","https://openalex.org/W3104844437","https://openalex.org/W3122784054","https://openalex.org/W3169367294","https://openalex.org/W3170630188","https://openalex.org/W3178075329","https://openalex.org/W3201770677","https://openalex.org/W3215899623","https://openalex.org/W3216551675","https://openalex.org/W4200631575","https://openalex.org/W4225495512","https://openalex.org/W4226024706","https://openalex.org/W4301621763","https://openalex.org/W4307504011","https://openalex.org/W4312458986","https://openalex.org/W4312560592","https://openalex.org/W4312598093","https://openalex.org/W4312690830","https://openalex.org/W4312732475","https://openalex.org/W4312845411","https://openalex.org/W4312868348","https://openalex.org/W4313123347","https://openalex.org/W4379984073","https://openalex.org/W4383899959","https://openalex.org/W4385245566","https://openalex.org/W4386075493","https://openalex.org/W4390017901","https://openalex.org/W4390871743","https://openalex.org/W4390871886","https://openalex.org/W4390872515","https://openalex.org/W4390872914","https://openalex.org/W4390873204","https://openalex.org/W4390873614","https://openalex.org/W4390874496","https://openalex.org/W4390874575","https://openalex.org/W4391547535","https://openalex.org/W4393178550","https://openalex.org/W4394593033","https://openalex.org/W4400032968","https://openalex.org/W4400579078","https://openalex.org/W4401990337","https://openalex.org/W4402702940","https://openalex.org/W4402703023","https://openalex.org/W4402716107","https://openalex.org/W4402727124","https://openalex.org/W4402727770","https://openalex.org/W4402753756","https://openalex.org/W4402754134","https://openalex.org/W4402754173","https://openalex.org/W4402773761","https://openalex.org/W4402774102","https://openalex.org/W4402774453","https://openalex.org/W4403940533","https://openalex.org/W4404544979","https://openalex.org/W4404769790","https://openalex.org/W4409368383","https://openalex.org/W4413145095"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"concentrates":[2],"on":[3,264,276,286],"Multi-modal":[4],"Referring":[5],"Video":[6],"Segmentation":[7],"task,":[8],"where":[9],"a":[10,38,45,142,155,248,280],"well":[11],"optimized":[12],"model":[13,34],"is":[14,147,217],"able":[15],"to":[16,52,63,77,84,96,136,149,192,220,231],"recognize":[17],"and":[18,100,120,133,169,239,270,293],"segment":[19],"the":[20,25,72,98,103,110,121,180,199,203,227,233,240,284,290],"target":[21,81,184],"objects":[22,185],"referred":[23],"by":[24,279],"given":[26],"guidance":[27],"signals,":[28],"e.g.,":[29],"language":[30],"description.":[31],"Early":[32],"approaches":[33],"this":[35,125,210],"task":[36],"as":[37],"sequence":[39],"prediction":[40],"problem.":[41],"The":[42],"lack":[43],"of":[44,48,105,183,202,235,243,257,295],"global":[46,106],"view":[47],"video":[49,195,228,237,267],"content":[50],"leads":[51],"difficulties":[53],"in":[54,124,186,209,298],"effectively":[55],"utilizing":[56],"inter-frame":[57,188],"relationships.":[58],"Some":[59],"recent":[60],"works":[61],"propose":[62,191],"perform":[64,150,193],"temporal":[65,287,303],"modeling":[66],"with":[67,102,159,206,302],"vanilla":[68],"attention":[69],"mechanism.":[70],"However,":[71],"condensed":[73],"visual":[74,176],"representation":[75,182],"tends":[76],"be":[78],"messy":[79],"about":[80],"information":[82,238],"due":[83],"occlusion":[85],"or":[86],"motion":[87],"blur.":[88],"Unlimited":[89],"non-local":[90],"operation":[91],"would":[92,178],"spread":[93],"such":[94],"noise":[95],"all":[97,277],"sequences":[99],"interfere":[101],"extraction":[104],"representations.":[107],"To":[108],"address":[109],"above":[111],"issue,":[112],"we":[113,190,246],"present":[114],"Semantic-assisted":[115],"Object":[116],"Cluster":[117],"network":[118],"(SOC)":[119],"improved":[122],"SOC++":[123],"paper.":[126],"Our":[127],"method":[128,272,297],"unifies":[129],"temporally":[130],"selective":[131],"interaction":[132],"cross-modal":[134],"alignment":[135],"achieve":[137],"video-level":[138],"understanding.":[139],"In":[140],"SOC++,":[141],"proxy-assisted":[143],"multi-modal":[144,213],"fusion":[145,251],"module":[146,158,252],"introduced":[148],"preliminary":[151],"bidirectional":[152],"activation.":[153],"Then":[154],"semantic":[156],"integration":[157],"progressive":[160],"frame-to-video":[161],"structure":[162],"facilitates":[163],"joint":[164,224,255],"space":[165,225],"learning":[166],"across":[167],"modalities":[168],"time":[170],"steps.":[171],"Considering":[172],"that":[173,253],"potential":[174],"noisy":[175],"embeddings":[177],"impair":[179],"overall":[181],"unconstrained":[187],"interactions,":[189],"tendentious":[194],"aggregation":[196],"through":[197],"emphasizing":[198],"indicative":[200],"role":[201],"informative":[204],"frames":[205],"lower":[207],"entropy":[208],"part.":[211],"A":[212],"query":[214,250],"contrastive":[215],"supervision":[216],"also":[218],"utilized":[219],"help":[221],"construct":[222],"well-aligned":[223],"at":[226],"level.":[229],"Moreover,":[230],"integrate":[232],"advantage":[234],"high-level":[236],"low-level":[241],"details":[242],"each":[244],"frame,":[245],"introduce":[247],"dynamic":[249],"performs":[254],"updating":[256],"these":[258],"embeddings.":[259],"We":[260],"conduct":[261],"extensive":[262],"experiments":[263],"popular":[265],"referring":[266],"segmentation":[268,291],"benchmarks,":[269],"our":[271,296],"outperforms":[273],"state-of-the-art":[274],"competitors":[275],"benchmarks":[278],"remarkable":[281],"margin.":[282],"Besides,":[283],"emphasis":[285],"coherence":[288],"enhances":[289],"stability":[292],"adaptability":[294],"processing":[299],"text":[300],"expressions":[301],"variations..":[304]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
