{"id":"https://openalex.org/W4379928661","doi":"https://doi.org/10.1109/tcsvt.2023.3284165","title":"Unified Multi-Modality Video Object Segmentation Using Reinforcement Learning","display_name":"Unified Multi-Modality Video Object Segmentation Using Reinforcement Learning","publication_year":2023,"publication_date":"2023-06-08","ids":{"openalex":"https://openalex.org/W4379928661","doi":"https://doi.org/10.1109/tcsvt.2023.3284165"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3284165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284165","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017171970","display_name":"Mingjie Sun","orcid":"https://orcid.org/0000-0002-3697-7927"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingjie Sun","raw_affiliation_strings":["School of Computer Science and Technology, Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011918180","display_name":"Jimin Xiao","orcid":"https://orcid.org/0000-0002-9416-2486"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jimin Xiao","raw_affiliation_strings":["School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033755146","display_name":"Eng Gee Lim","orcid":"https://orcid.org/0000-0003-0199-7386"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Eng Gee Lim","raw_affiliation_strings":["School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064237960","display_name":"Cairong Zhao","orcid":"https://orcid.org/0000-0001-6745-9674"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cairong Zhao","raw_affiliation_strings":["College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100362745","display_name":"Yao Zhao","orcid":"https://orcid.org/0000-0002-8581-9554"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhao","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5017171970"],"corresponding_institution_ids":["https://openalex.org/I3923682"],"apc_list":null,"apc_paid":null,"fwci":1.9647,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88301433,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"34","issue":"8","first_page":"6722","last_page":"6734"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7204650640487671},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6878708600997925},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6415179967880249},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6352286338806152},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.529181182384491},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4999241828918457},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4893330931663513},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4553026556968689}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204650640487671},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6878708600997925},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6415179967880249},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6352286338806152},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.529181182384491},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4999241828918457},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4893330931663513},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4553026556968689}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3284165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284165","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1794522962","display_name":null,"funder_award_id":"61972323","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":85,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1614298861","https://openalex.org/W2108598243","https://openalex.org/W2213486648","https://openalex.org/W2250539671","https://openalex.org/W2250861254","https://openalex.org/W2470139095","https://openalex.org/W2606746036","https://openalex.org/W2799157347","https://openalex.org/W2799256316","https://openalex.org/W2804284560","https://openalex.org/W2890447039","https://openalex.org/W2915683453","https://openalex.org/W2916743882","https://openalex.org/W2921536280","https://openalex.org/W2962825871","https://openalex.org/W2962942822","https://openalex.org/W2963150697","https://openalex.org/W2963253279","https://openalex.org/W2963732700","https://openalex.org/W2964218467","https://openalex.org/W2964345792","https://openalex.org/W2964665981","https://openalex.org/W2970724283","https://openalex.org/W2983335573","https://openalex.org/W2986357018","https://openalex.org/W2990205821","https://openalex.org/W3027731085","https://openalex.org/W3034538699","https://openalex.org/W3034728521","https://openalex.org/W3034798428","https://openalex.org/W3035042565","https://openalex.org/W3035080658","https://openalex.org/W3092704883","https://openalex.org/W3104844437","https://openalex.org/W3106773277","https://openalex.org/W3108188579","https://openalex.org/W3108819577","https://openalex.org/W3110030584","https://openalex.org/W3117097536","https://openalex.org/W3126391825","https://openalex.org/W3154169267","https://openalex.org/W3160550216","https://openalex.org/W3166166117","https://openalex.org/W3168984673","https://openalex.org/W3171098737","https://openalex.org/W3173332661","https://openalex.org/W3173993018","https://openalex.org/W3175132347","https://openalex.org/W3175373394","https://openalex.org/W3176232375","https://openalex.org/W3177322837","https://openalex.org/W3178075329","https://openalex.org/W3182736162","https://openalex.org/W3183673520","https://openalex.org/W3192871594","https://openalex.org/W3200949949","https://openalex.org/W3203187896","https://openalex.org/W3209806402","https://openalex.org/W4205421564","https://openalex.org/W4225575592","https://openalex.org/W4283796148","https://openalex.org/W4283827327","https://openalex.org/W4287653886","https://openalex.org/W4296079457","https://openalex.org/W4312313652","https://openalex.org/W4312528117","https://openalex.org/W4312734171","https://openalex.org/W4313171270","https://openalex.org/W4385245566","https://openalex.org/W4390874575","https://openalex.org/W6631190155","https://openalex.org/W6636510571","https://openalex.org/W6683195989","https://openalex.org/W6683935339","https://openalex.org/W6754033419","https://openalex.org/W6759534164","https://openalex.org/W6783869432","https://openalex.org/W6784639621","https://openalex.org/W6784713722","https://openalex.org/W6792111848","https://openalex.org/W6796280812","https://openalex.org/W6796505553","https://openalex.org/W6796524941","https://openalex.org/W6797112455"],"related_works":["https://openalex.org/W2385859805","https://openalex.org/W2530972254","https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W1522196789"],"abstract_inverted_index":{"The":[0,166],"main":[1],"task":[2],"we":[3],"aim":[4],"to":[5,34,41,104,128],"tackle":[6],"is":[7,32,68,126],"the":[8,26,36,42,71,106,111,141],"multi-modality":[9,117],"video":[10],"object":[11],"segmentation":[12,101],"(VOS),":[13],"which":[14,93],"can":[15],"be":[16,169],"divided":[17],"into":[18,79,90],"two":[19,59],"sub-tasks:":[20],"mask-referred":[21],"and":[22,73,85,87,109,123,156],"language-referred":[23],"VOS,":[24],"where":[25,70],"first-frame":[27],"mask-level":[28],"or":[29,113],"language-level":[30],"label":[31],"utilized":[33],"provide":[35],"target":[37],"information,":[38],"respectively.":[39,165],"Due":[40],"huge":[43],"gap":[44],"between":[45],"different":[46],"modalities,":[47],"existing":[48],"works":[49],"never":[50],"come":[51],"up":[52],"with":[53],"a":[54,65,80,98],"unified":[55,66,116],"framework":[56,67],"for":[57],"these":[58],"sub-tasks.":[60],"In":[61],"this":[62],"work,":[63],"such":[64,130],"designed,":[69,122],"visual":[72],"linguistic":[74],"inputs":[75],"are":[76,94,120,138],"first":[77],"spilt":[78],"number":[81],"of":[82,145,158],"image":[83],"patches":[84],"words,":[86],"then":[88],"mapped":[89],"same-size":[91],"tokens,":[92],"equally":[95],"processed":[96],"by":[97,140],"self-attention":[99],"based":[100],"model.":[102],"Furthermore,":[103],"highlight":[105],"significant":[107],"information":[108],"discard":[110],"non-target":[112],"ambiguous":[114],"one,":[115],"filter":[118],"networks":[119],"further":[121],"reinforcement":[124],"learning":[125],"adopted":[127],"optimize":[129],"networks.":[131],"Experiments":[132],"show":[133],"that":[134],"new":[135],"state-of-the-art":[136],"performances":[137],"achieved":[139],"proposed":[142],"method:":[143],"52.8%":[144],"<italic":[146,150,159],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[147,151,160],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">J</i>":[148],"&":[149],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">F</i>":[152],"on":[153,162],"Ref-YoutubeVOS":[154],"dataset":[155],"83.2%":[157],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">J<sub>S</sub></i>":[161],"YoutubeVOS":[163],"dataset,":[164],"code":[167],"will":[168],"released.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
