{"id":"https://openalex.org/W4402917031","doi":"https://doi.org/10.1109/cvprw63382.2024.00274","title":"UVIS: Unsupervised Video Instance Segmentation","display_name":"UVIS: Unsupervised Video Instance Segmentation","publication_year":2024,"publication_date":"2024-06-17","ids":{"openalex":"https://openalex.org/W4402917031","doi":"https://doi.org/10.1109/cvprw63382.2024.00274"},"language":"en","primary_location":{"id":"doi:10.1109/cvprw63382.2024.00274","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvprw63382.2024.00274","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064995424","display_name":"Shuaiyi Huang","orcid":"https://orcid.org/0000-0003-0555-2077"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shuaiyi Huang","raw_affiliation_strings":["University of Maryland,College Park"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College Park","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038235276","display_name":"Saksham Suri","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Saksham Suri","raw_affiliation_strings":["University of Maryland,College Park"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College Park","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102727211","display_name":"Kamal Gupta","orcid":"https://orcid.org/0000-0003-0531-2735"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kamal Gupta","raw_affiliation_strings":["University of Maryland,College Park"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College Park","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088236196","display_name":"Sai Saketh Rambhatla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sai Saketh Rambhatla","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029216091","display_name":"Ser-nam Lim","orcid":null},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ser-nam Lim","raw_affiliation_strings":["University of Central Florida"],"affiliations":[{"raw_affiliation_string":"University of Central Florida","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101614443","display_name":"Abhinav Shrivastava","orcid":"https://orcid.org/0000-0001-8928-8554"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abhinav Shrivastava","raw_affiliation_strings":["University of Maryland,College Park"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College Park","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5064995424"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":1.8421,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86958471,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2682","last_page":"2692"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9690999984741211,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7383348345756531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5915354490280151},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5865769386291504},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5061308741569519},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.42211633920669556},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34657132625579834}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7383348345756531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5915354490280151},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5865769386291504},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5061308741569519},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42211633920669556},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34657132625579834}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvprw63382.2024.00274","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvprw63382.2024.00274","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2156406284","https://openalex.org/W2194775991","https://openalex.org/W2470139095","https://openalex.org/W2603203130","https://openalex.org/W2889986507","https://openalex.org/W2919167158","https://openalex.org/W2962867364","https://openalex.org/W2962914239","https://openalex.org/W2982121679","https://openalex.org/W2982723417","https://openalex.org/W3108043693","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W3169933013","https://openalex.org/W3170511209","https://openalex.org/W3171007011","https://openalex.org/W3171516518","https://openalex.org/W3173326549","https://openalex.org/W3173980723","https://openalex.org/W3175890016","https://openalex.org/W3191142899","https://openalex.org/W3202509201","https://openalex.org/W3203111377","https://openalex.org/W3204171527","https://openalex.org/W3205239453","https://openalex.org/W3212555189","https://openalex.org/W4214613769","https://openalex.org/W4214861803","https://openalex.org/W4221161778","https://openalex.org/W4312400547","https://openalex.org/W4312420092","https://openalex.org/W4312960937","https://openalex.org/W4313007081","https://openalex.org/W4313156423","https://openalex.org/W4319300199","https://openalex.org/W4319301005","https://openalex.org/W4385804776","https://openalex.org/W4386071623","https://openalex.org/W4386071704","https://openalex.org/W4386075882","https://openalex.org/W4386113253","https://openalex.org/W4390872702","https://openalex.org/W6735925877","https://openalex.org/W6759534164","https://openalex.org/W6761973954","https://openalex.org/W6765150368","https://openalex.org/W6779977557","https://openalex.org/W6791353385","https://openalex.org/W6791742336","https://openalex.org/W6796494355","https://openalex.org/W6800493387","https://openalex.org/W6800751262","https://openalex.org/W6809716307","https://openalex.org/W6811470611","https://openalex.org/W6843861238","https://openalex.org/W6846835116"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0,30],"instance":[1,39],"segmentation":[2,40],"requires":[3],"classifying,":[4],"segmenting,":[5],"and":[6,66,93,123,147],"tracking":[7,125],"every":[8],"object":[9,133],"across":[10],"video":[11,38,43,159],"frames.":[12],"Unlike":[13],"existing":[14],"approaches":[15],"that":[16,35],"rely":[17],"on":[18,139,155],"masks,":[19],"boxes,":[20],"or":[21,45,161],"category":[22],"labels,":[23],"we":[24,107],"propose":[25],"UVIS,":[26],"a":[27,109,115,124],"novel":[28],"Unsupervised":[29],"Instance":[31],"Segmentation":[32],"(UVIS)":[33],"framework":[34,80],"can":[36],"perform":[37],"without":[41,157],"any":[42,158],"annotations":[44,160],"dense":[46,56,162],"label-based":[47],"pretraining.":[48],"Our":[49,78,150],"key":[50],"insight":[51],"comes":[52],"from":[53,59,71],"leveraging":[54],"the":[55,60,67,72,98,104,165],"shape":[57],"prior":[58],"self-supervised":[61],"vision":[62],"foundation":[63],"model":[64,76,91],"DINO":[65],"open-set":[68],"recognition":[69],"ability":[70],"image-caption":[73],"supervised":[74],"vision-language":[75],"CLIP.":[77],"UVIS":[79,151],"consists":[81],"of":[82,100,167],"three":[83,140],"essential":[84],"steps:":[85],"frame-level":[86],"pseudo-label":[87],"generation,":[88],"transformer-based":[89],"VIS":[90,101,142,170],"training,":[92],"query-based":[94],"tracking.":[95],"To":[96],"improve":[97],"quality":[99],"predictions":[102],"in":[103,132],"unsupervised":[105,169],"setup,":[106],"introduce":[108],"dual-memory":[110],"design.":[111],"This":[112],"design":[113],"includes":[114],"semantic":[116],"memory":[117,126],"bank":[118,127],"for":[119,128],"generating":[120],"accurate":[121],"pseudo-labels":[122],"maintaining":[129],"temporal":[130],"consistency":[131],"tracks.":[134],"We":[135],"evaluate":[136],"our":[137,168],"approach":[138],"standard":[141],"benchmarks,":[143],"namely":[144],"YoutubeVIS-2019,":[145],"YoutubeVIS-2021,":[146],"Occluded":[148],"VIS.":[149],"achieves":[152],"21.1":[153],"AP":[154],"YoutubeVIS-2019":[156],"pretraining,":[163],"demonstrating":[164],"potential":[166],"framework.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
