{"id":"https://openalex.org/W4379984073","doi":"https://doi.org/10.1109/tcsvt.2023.3284979","title":"Decoupling Multimodal Transformers for Referring Video Object Segmentation","display_name":"Decoupling Multimodal Transformers for Referring Video Object Segmentation","publication_year":2023,"publication_date":"2023-06-09","ids":{"openalex":"https://openalex.org/W4379984073","doi":"https://doi.org/10.1109/tcsvt.2023.3284979"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3284979","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284979","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.whiterose.ac.uk/200270/1/FINAL_VERSION.PDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048649183","display_name":"Mingqi Gao","orcid":"https://orcid.org/0000-0002-8688-8228"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]},{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Mingqi Gao","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","Warwick Manufacturing Group, University of Warwick, Coventry, U.K"],"raw_orcid":"https://orcid.org/0000-0002-8688-8228","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Warwick Manufacturing Group, University of Warwick, Coventry, U.K","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101738258","display_name":"Jinyu Yang","orcid":"https://orcid.org/0000-0002-9765-9009"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]},{"id":"https://openalex.org/I79619799","display_name":"University of Birmingham","ror":"https://ror.org/03angcq70","country_code":"GB","type":"education","lineage":["https://openalex.org/I79619799"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Jinyu Yang","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","School of Computer Science, University of Birmingham, Birmingham, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"School of Computer Science, University of Birmingham, Birmingham, U.K","institution_ids":["https://openalex.org/I79619799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046605531","display_name":"Jungong Han","orcid":"https://orcid.org/0000-0003-4361-956X"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]},{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jungong Han","raw_affiliation_strings":["Warwick Manufacturing Group, University of Warwick, Coventry, U.K","Department of Computer Science, The University of Sheffield, Sheffield, U.K"],"raw_orcid":"https://orcid.org/0000-0003-4361-956X","affiliations":[{"raw_affiliation_string":"Warwick Manufacturing Group, University of Warwick, Coventry, U.K","institution_ids":["https://openalex.org/I39555362"]},{"raw_affiliation_string":"Department of Computer Science, The University of Sheffield, Sheffield, U.K","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072833759","display_name":"Ke L\u00fc","orcid":"https://orcid.org/0000-0003-0176-3088"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Lu","raw_affiliation_strings":["School of Engineering Science, University of Chinese Academy of Sciences, Beijing, China","Peng Cheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-0176-3088","affiliations":[{"raw_affiliation_string":"School of Engineering Science, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063285882","display_name":"Feng Zheng","orcid":"https://orcid.org/0000-0002-1701-9141"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zheng","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-1701-9141","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010581004","display_name":"Giovanni Montana","orcid":"https://orcid.org/0000-0003-3942-3900"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Giovanni Montana","raw_affiliation_strings":["Warwick Manufacturing Group, University of Warwick, Coventry, U.K"],"raw_orcid":"https://orcid.org/0000-0003-3942-3900","affiliations":[{"raw_affiliation_string":"Warwick Manufacturing Group, University of Warwick, Coventry, U.K","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3578,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.90582288,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"33","issue":"9","first_page":"4518","last_page":"4528"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8342177271842957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6452504396438599},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5839776396751404},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5803271532058716},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.48585769534111023},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4827655851840973},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43869149684906006},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4134308993816376},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.383070707321167},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.352057546377182},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3468698263168335}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8342177271842957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6452504396438599},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5839776396751404},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5803271532058716},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.48585769534111023},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4827655851840973},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43869149684906006},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4134308993816376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.383070707321167},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.352057546377182},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3468698263168335},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcsvt.2023.3284979","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284979","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:200270","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.whiterose.ac.uk/200270/1/FINAL_VERSION.PDF","source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:eprints.whiterose.ac.uk:200270","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.whiterose.ac.uk/200270/1/FINAL_VERSION.PDF","source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G7180131230","display_name":"\u9762\u5411\u591a\u6001\u5f02\u6784\u5927\u6570\u636e\u7684\u9ad8\u6548\u80fd\u68c0\u7d22\u7b97\u6cd5\u7814\u7a76","funder_award_id":"61972188","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7522395172","display_name":null,"funder_award_id":"2022YFF1202903","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8142145165","display_name":null,"funder_award_id":"62122035","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4379984073.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W2489434015","https://openalex.org/W2576546729","https://openalex.org/W2896457183","https://openalex.org/W2962942822","https://openalex.org/W2963109634","https://openalex.org/W2963354481","https://openalex.org/W2983693499","https://openalex.org/W2997063389","https://openalex.org/W3034777757","https://openalex.org/W3092462694","https://openalex.org/W3096609285","https://openalex.org/W3104844437","https://openalex.org/W3122784054","https://openalex.org/W3138516171","https://openalex.org/W3166396011","https://openalex.org/W3171516518","https://openalex.org/W3177892185","https://openalex.org/W3178075329","https://openalex.org/W3182236906","https://openalex.org/W3205021045","https://openalex.org/W3212456749","https://openalex.org/W3214685499","https://openalex.org/W3215899623","https://openalex.org/W4220869836","https://openalex.org/W4225495512","https://openalex.org/W4226024706","https://openalex.org/W4282919422","https://openalex.org/W4283796148","https://openalex.org/W4285191490","https://openalex.org/W4295312788","https://openalex.org/W4307504011","https://openalex.org/W4312438304","https://openalex.org/W4312690830","https://openalex.org/W4312956471","https://openalex.org/W4312981390","https://openalex.org/W4320169590","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6749526849","https://openalex.org/W6755207826","https://openalex.org/W6766978945","https://openalex.org/W6784094891","https://openalex.org/W6788885258","https://openalex.org/W6791353385","https://openalex.org/W6798505901","https://openalex.org/W6799085253","https://openalex.org/W6803953248","https://openalex.org/W6804095316","https://openalex.org/W6811387395","https://openalex.org/W6839745749"],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W3208297503","https://openalex.org/W2889153461","https://openalex.org/W3119773509","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353"],"abstract_inverted_index":{"Referring":[0],"Video":[1],"Object":[2],"Segmentation":[3],"(RVOS)":[4],"aims":[5],"to":[6,111,119],"segment":[7],"the":[8,41,51,55,58,76,81,92,96,100,139,145,153,164,171],"text-depicted":[9],"object":[10],"from":[11,95],"video":[12],"sequences.":[13],"With":[14],"excellent":[15],"capabilities":[16],"in":[17,28,67],"long-range":[18],"modelling":[19],"and":[20,45,102,122,130,134],"information":[21],"interaction,":[22,59],"transformers":[23],"have":[24],"been":[25],"increasingly":[26],"applied":[27],"existing":[29],"RVOS":[30,160],"architectures.":[31],"To":[32],"better":[33],"leverage":[34],"multimodal":[35,77],"data,":[36],"most":[37],"efforts":[38],"focus":[39],"on":[40,158],"interaction":[42,78],"between":[43],"visual":[44,114],"textual":[46,62],"features.":[47,115],"However,":[48],"they":[49],"ignore":[50],"syntactic":[52],"structures":[53],"of":[54,166,175],"text":[56,103],"during":[57],"where":[60],"all":[61,159],"components":[63],"are":[64,105],"intertwined,":[65],"resulting":[66],"ambiguous":[68],"vision-language":[69,146],"alignment.":[70,135],"In":[71],"this":[72],"paper,":[73],"we":[74,84,137],"improve":[75],"by":[79],"DECOUPLING":[80],"interweave.":[82],"Specifically,":[83],"train":[85],"a":[86],"lightweight":[87],"subject":[88,93,101],"perceptron,":[89],"which":[90],"extracts":[91],"part":[94],"input":[97],"text.":[98],"Then,":[99],"features":[104],"fed":[106],"into":[107,149],"two":[108],"parallel":[109],"branches":[110],"interact":[112],"with":[113],"This":[116],"enables":[117],"us":[118],"perform":[120],"subject-aware":[121],"context-aware":[123],"interactions,":[124],"respectively,":[125],"thus":[126],"encouraging":[127],"more":[128],"explicit":[129],"discriminative":[131],"feature":[132],"embedding":[133],"Moreover,":[136],"find":[138],"decoupled":[140],"architecture":[141],"also":[142],"facilitates":[143],"incorporating":[144],"pre-trained":[147],"alignment":[148],"RVOS,":[150],"further":[151],"improving":[152],"segmentation":[154],"performance.":[155],"Experimental":[156],"results":[157],"benchmark":[161],"datasets":[162],"demonstrate":[163],"superiority":[165],"our":[167,176],"proposed":[168],"method":[169,177],"over":[170],"state-of-the-arts.":[172],"The":[173],"code":[174],"is":[178],"available":[179],"at:":[180],"<uri":[181],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[182],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/gaomingqi/dmformer</uri>":[183],".":[184]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
