{"id":"https://openalex.org/W4386556541","doi":"https://doi.org/10.48550/arxiv.2309.01391","title":"SSVOD: Semi-Supervised Video Object Detection with Sparse Annotations","display_name":"SSVOD: Semi-Supervised Video Object Detection with Sparse Annotations","publication_year":2023,"publication_date":"2023-09-04","ids":{"openalex":"https://openalex.org/W4386556541","doi":"https://doi.org/10.48550/arxiv.2309.01391"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.01391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.01391","pdf_url":"https://arxiv.org/pdf/2309.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.01391","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014251585","display_name":"Tanvir Mahmud","orcid":"https://orcid.org/0000-0003-0529-2826"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahmud, Tanvir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080469470","display_name":"Chun-Hao Liu","orcid":"https://orcid.org/0000-0002-2368-6266"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chun-Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031984988","display_name":"Burhaneddin Yaman","orcid":"https://orcid.org/0000-0003-0791-5900"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaman, Burhaneddin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5065985595","display_name":"Diana Marculescu","orcid":"https://orcid.org/0000-0002-5734-4221"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marculescu, Diana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8316012620925903},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7344183325767517},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.579336941242218},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5600665807723999},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5313447117805481},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5248575210571289},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5023164749145508},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4873700737953186},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4675026834011078},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4455455541610718},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.442465215921402},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.43935665488243103},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3613168001174927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8316012620925903},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7344183325767517},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.579336941242218},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5600665807723999},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5313447117805481},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5248575210571289},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5023164749145508},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4873700737953186},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4675026834011078},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4455455541610718},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.442465215921402},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43935665488243103},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3613168001174927},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.01391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.01391","pdf_url":"https://arxiv.org/pdf/2309.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2309.01391","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.01391","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.01391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.01391","pdf_url":"https://arxiv.org/pdf/2309.01391","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386556541.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W4321784794"],"abstract_inverted_index":{"Despite":[0,40],"significant":[1,181],"progress":[2],"in":[3,45,80,165],"semi-supervised":[4,66,90],"learning":[5],"for":[6,18,26,50,125,148],"image":[7],"object":[8,20,29,92],"detection,":[9],"several":[10],"key":[11],"issues":[12],"are":[13],"yet":[14],"to":[15,101,144],"be":[16,198],"addressed":[17],"video":[19,28,57,91],"detection:":[21],"(1)":[22],"Achieving":[23],"good":[24],"performance":[25,182],"supervised":[27],"detection":[30,93],"greatly":[31],"depends":[32],"on":[33,68,187],"the":[34,74],"availability":[35],"of":[36,54,99,116,141,173],"annotated":[37],"frames.":[38],"(2)":[39],"having":[41],"large":[42,52],"inter-frame":[43],"correlations":[44],"a":[46,51,139,157],"video,":[47],"collecting":[48],"annotations":[49],"number":[53],"frames":[55,105,124],"per":[56],"is":[58],"expensive,":[59],"time-consuming,":[60],"and":[61,133,151,162,175,190,194],"often":[62],"redundant.":[63],"(3)":[64],"Existing":[65],"techniques":[67],"static":[69],"images":[70],"can":[71],"hardly":[72],"exploit":[73],"temporal":[75],"motion":[76,97],"dynamics":[77,98],"inherently":[78],"present":[79],"videos.":[81],"In":[82,128],"this":[83],"paper,":[84],"we":[85,118,130,167],"introduce":[86,119,131],"SSVOD,":[87],"an":[88],"end-to-end":[89],"framework":[94],"that":[95],"exploits":[96],"videos":[100],"utilize":[102],"large-scale":[103],"unlabeled":[104],"with":[106],"sparse":[107],"annotations.":[108],"To":[109,155],"selectively":[110],"assemble":[111],"robust":[112,146],"pseudo-labels":[113,147],"across":[114],"groups":[115],"frames,":[117],"\\textit{flow-warped":[120],"predictions}":[121],"from":[122],"nearby":[123],"temporal-consistency":[126],"estimation.":[127],"particular,":[129],"cross-IoU":[132],"cross-divergence":[134],"based":[135,171],"selection":[136],"methods":[137,186],"over":[138,184],"set":[140],"estimated":[142],"predictions":[143],"include":[145],"bounding":[149],"boxes":[150],"class":[152],"labels,":[153],"respectively.":[154],"strike":[156],"balance":[158],"between":[159],"confirmation":[160],"bias":[161],"uncertainty":[163],"noise":[164],"pseudo-labels,":[166],"propose":[168],"confidence":[169],"threshold":[170],"combination":[172],"hard":[174],"soft":[176],"pseudo-labels.":[177],"Our":[178],"method":[179],"achieves":[180],"improvements":[183],"existing":[185],"ImageNet-VID,":[188],"Epic-KITCHENS,":[189],"YouTube-VIS":[191],"datasets.":[192],"Code":[193],"pre-trained":[195],"models":[196],"will":[197],"released.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
