{"id":"https://openalex.org/W4220869836","doi":"https://doi.org/10.1109/tip.2022.3161832","title":"Object-Agnostic Transformers for Video Referring Segmentation","display_name":"Object-Agnostic Transformers for Video Referring Segmentation","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4220869836","doi":"https://doi.org/10.1109/tip.2022.3161832","pmid":"https://pubmed.ncbi.nlm.nih.gov/35349441"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2022.3161832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3161832","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100729478","display_name":"Xu Yang","orcid":"https://orcid.org/0000-0002-0405-6816"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Yang","raw_affiliation_strings":["School of Electronic Engineering, Xidian University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-0405-6816","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663935","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0002-3048-8268"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["School of Electronic Engineering, Xidian University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-3048-8268","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078372093","display_name":"De Xie","orcid":"https://orcid.org/0000-0002-5535-0898"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"De Xie","raw_affiliation_strings":["School of Electronic Engineering, Xidian University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-5535-0898","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015874725","display_name":"Cheng Deng","orcid":"https://orcid.org/0000-0003-2620-3247"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Deng","raw_affiliation_strings":["School of Electronic Engineering, Xidian University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0003-2620-3247","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074103823","display_name":"Dacheng Tao","orcid":"https://orcid.org/0000-0001-7225-5449"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dacheng Tao","raw_affiliation_strings":["JD Explore Academy, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7225-5449","affiliations":[{"raw_affiliation_string":"JD Explore Academy, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8275,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.86216448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"31","issue":null,"first_page":"2839","last_page":"2849"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8488763570785522},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.729458212852478},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.722111165523529},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5577829480171204},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5486220121383667},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.5059806704521179},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4960962235927582},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4917488396167755},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.48580265045166016},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.48401781916618347},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.47316819429397583},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.42724549770355225},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4230271279811859}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8488763570785522},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.729458212852478},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.722111165523529},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5577829480171204},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5486220121383667},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.5059806704521179},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4960962235927582},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4917488396167755},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.48580265045166016},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.48401781916618347},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.47316819429397583},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.42724549770355225},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4230271279811859},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2022.3161832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3161832","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:35349441","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35349441","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G1372603746","display_name":null,"funder_award_id":"62171343","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1816562919","display_name":null,"funder_award_id":"2021ZDLGY01-03","funder_id":"https://openalex.org/F4320336350","funder_display_name":"Key Research and Development Projects of Shaanxi Province"},{"id":"https://openalex.org/G3509175749","display_name":null,"funder_award_id":"ZDRC2102","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4470460840","display_name":null,"funder_award_id":"2017YFE0104100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4642963363","display_name":null,"funder_award_id":"2021ZDLGY01-03","funder_id":"https://openalex.org/F4320335774","funder_display_name":"Key Technologies Research and Development Program"},{"id":"https://openalex.org/G5089867355","display_name":"\u57fa\u4e8e\u89c6\u89c9\u8bed\u8a00\u9a71\u52a8\u7684\u591a\u6a21\u6001\u8ba4\u77e5\u65b9\u6cd5\u7814\u7a76","funder_award_id":"62071361","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G986958842","display_name":null,"funder_award_id":"62132016","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335774","display_name":"Key Technologies Research and Development Program","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320336350","display_name":"Key Research and Development Projects of Shaanxi Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1905722737","https://openalex.org/W1924770834","https://openalex.org/W2034014085","https://openalex.org/W2251512949","https://openalex.org/W2302548814","https://openalex.org/W2470139095","https://openalex.org/W2507009361","https://openalex.org/W2618799552","https://openalex.org/W2734973448","https://openalex.org/W2746692960","https://openalex.org/W2747053578","https://openalex.org/W2896457183","https://openalex.org/W2904017099","https://openalex.org/W2908510526","https://openalex.org/W2953133772","https://openalex.org/W2963094665","https://openalex.org/W2963354481","https://openalex.org/W2963524571","https://openalex.org/W2963820951","https://openalex.org/W2964051877","https://openalex.org/W2966715458","https://openalex.org/W2968124245","https://openalex.org/W2981851019","https://openalex.org/W2983693499","https://openalex.org/W2997063389","https://openalex.org/W3014611590","https://openalex.org/W3015671815","https://openalex.org/W3033504500","https://openalex.org/W3034336960","https://openalex.org/W3034777757","https://openalex.org/W3090449556","https://openalex.org/W3092054443","https://openalex.org/W3124671614","https://openalex.org/W3126751243","https://openalex.org/W3164963798","https://openalex.org/W3171547673","https://openalex.org/W3172006205","https://openalex.org/W3189597199","https://openalex.org/W3212940248","https://openalex.org/W4214490042","https://openalex.org/W4293295238","https://openalex.org/W4295312788","https://openalex.org/W4385245566","https://openalex.org/W6640212811","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6766904570","https://openalex.org/W6766978945","https://openalex.org/W6767279747","https://openalex.org/W6775188310","https://openalex.org/W6786716802","https://openalex.org/W6797397777"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W4205302943","https://openalex.org/W2119949815","https://openalex.org/W2561132942","https://openalex.org/W2142795561","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2900482274"],"abstract_inverted_index":{"Video":[0],"referring":[1,112],"segmentation":[2,164,172],"focuses":[3],"on":[4,13,199],"segmenting":[5],"out":[6],"the":[7,14,82,115,130,178,191,194],"object":[8,118,142],"in":[9],"a":[10,96,146,161,182],"video":[11,111],"based":[12],"corresponding":[15],"textual":[16,133],"description.":[17],"Previous":[18],"works":[19],"have":[20],"primarily":[21],"tackled":[22],"this":[23,50],"task":[24,169],"by":[25],"devising":[26],"two":[27,46,200],"crucial":[28],"parts,":[29],"an":[30,37],"intra-modal":[31,106],"module":[32,39],"for":[33,40,110],"context":[34,58,150],"modeling":[35,59],"and":[36,60,84,107,135,151,155,173,205],"inter-modal":[38,108],"heterogeneous":[41,61],"alignment.":[42],"However,":[43],"there":[44],"are":[45,153],"essential":[47],"drawbacks":[48],"of":[49,57,86,117,132,180,193],"approach:":[51],"(1)":[52],"it":[53],"lacks":[54],"joint":[55],"learning":[56,109],"alignment,":[62],"leading":[63],"to":[64,77,166,188],"insufficient":[65],"interactions":[66],"among":[67],"input":[68],"elements;":[69],"(2)":[70],"both":[71],"modules":[72],"require":[73],"task-specific":[74],"expert":[75],"knowledge":[76],"design,":[78],"which":[79],"severely":[80],"limits":[81],"flexibility":[83],"generality":[85],"prior":[87],"methods.":[88,216],"To":[89],"address":[90],"these":[91],"problems,":[92],"we":[93,126],"here":[94],"propose":[95],"novel":[97,162],"Object-Agnostic":[98],"Transformer-based":[99],"Network,":[100],"called":[101],"OATNet,":[102],"that":[103,209],"simultaneously":[104,154],"conducts":[105],"segmentation,":[113],"without":[114],"aid":[116],"detection":[119],"or":[120],"category-specific":[121],"pixel":[122],"labeling.":[123],"More":[124],"specifically,":[125],"first":[127],"directly":[128],"feed":[129],"sequence":[131],"tokens":[134,137],"visual":[136],"(pixels":[138],"rather":[139],"than":[140],"detected":[141],"bounding":[143],"boxes)":[144],"into":[145,170],"multi-modal":[147],"encoder,":[148],"where":[149],"alignment":[152],"effectively":[156],"explored.":[157],"We":[158],"then":[159],"design":[160],"cascade":[163],"network":[165],"decouple":[167],"our":[168,210],"coarse-grained":[171],"fine-grained":[174],"refinement.":[175],"Moreover,":[176],"considering":[177],"difficulty":[179],"samples,":[181],"more":[183],"balanced":[184],"metric":[185],"is":[186],"provided":[187],"better":[189],"diagnose":[190],"performance":[192],"proposed":[195,211],"method.":[196],"Extensive":[197],"experiments":[198],"popular":[201],"datasets,":[202],"A2D":[203],"Sentences":[204],"J-HMDB":[206],"Sentences,":[207],"demonstrate":[208],"approach":[212],"noticeably":[213],"outperforms":[214],"state-of-the-art":[215]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-24T13:16:06.693445","created_date":"2025-10-10T00:00:00"}
