{"id":"https://openalex.org/W4391428214","doi":"https://doi.org/10.1109/tcsvt.2024.3361076","title":"Learning Better Video Query With SAM for Video Instance Segmentation","display_name":"Learning Better Video Query With SAM for Video Instance Segmentation","publication_year":2024,"publication_date":"2024-02-01","ids":{"openalex":"https://openalex.org/W4391428214","doi":"https://doi.org/10.1109/tcsvt.2024.3361076"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3361076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3361076","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068398083","display_name":"Hao Fang","orcid":"https://orcid.org/0000-0002-8846-8294"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Fang","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114833240","display_name":"Tong Zhang","orcid":"https://orcid.org/0000-0001-8163-3050"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Zhang","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075052648","display_name":"Xiaofei Zhou","orcid":"https://orcid.org/0000-0002-7977-9728"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei Zhou","raw_affiliation_strings":["School of Automation, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100375447","display_name":"Xinxin Zhang","orcid":"https://orcid.org/0000-0001-6069-5391"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinxin Zhang","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5068398083"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":3.9198,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.94522103,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"35","issue":"4","first_page":"2963","last_page":"2974"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7721748352050781},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5806358456611633},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5464453101158142},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.4737689793109894},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.45378491282463074},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.451092004776001},{"id":"https://openalex.org/keywords/multiview-video-coding","display_name":"Multiview Video Coding","score":0.43437811732292175},{"id":"https://openalex.org/keywords/video-compression-picture-types","display_name":"Video compression picture types","score":0.4323032796382904},{"id":"https://openalex.org/keywords/video-post-processing","display_name":"Video post-processing","score":0.4235285818576813},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.35899144411087036}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7721748352050781},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5806358456611633},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5464453101158142},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.4737689793109894},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.45378491282463074},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.451092004776001},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.43437811732292175},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.4323032796382904},{"id":"https://openalex.org/C117090137","wikidata":"https://www.wikidata.org/wiki/Q7927977","display_name":"Video post-processing","level":5,"score":0.4235285818576813},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.35899144411087036}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3361076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3361076","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3191824907","display_name":null,"funder_award_id":"62101309","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7596411858","display_name":null,"funder_award_id":"62271180","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8613467077","display_name":null,"funder_award_id":"ZR2021QF109","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2222512263","https://openalex.org/W2939731335","https://openalex.org/W2962914239","https://openalex.org/W2963150697","https://openalex.org/W2979933490","https://openalex.org/W2982723417","https://openalex.org/W3010637453","https://openalex.org/W3034499084","https://openalex.org/W3048306538","https://openalex.org/W3091734938","https://openalex.org/W3096609285","https://openalex.org/W3106546328","https://openalex.org/W3109372619","https://openalex.org/W3110109236","https://openalex.org/W3131465228","https://openalex.org/W3132520841","https://openalex.org/W3138516171","https://openalex.org/W3139267983","https://openalex.org/W3160550216","https://openalex.org/W3162694035","https://openalex.org/W3169933013","https://openalex.org/W3171516518","https://openalex.org/W3173980723","https://openalex.org/W3182236906","https://openalex.org/W3192692200","https://openalex.org/W3199093552","https://openalex.org/W3202424564","https://openalex.org/W3202462044","https://openalex.org/W3202509201","https://openalex.org/W3212555189","https://openalex.org/W3212940248","https://openalex.org/W4214613769","https://openalex.org/W4214627427","https://openalex.org/W4214654781","https://openalex.org/W4221166385","https://openalex.org/W4225495512","https://openalex.org/W4293567872","https://openalex.org/W4312275238","https://openalex.org/W4312335509","https://openalex.org/W4312396403","https://openalex.org/W4312465615","https://openalex.org/W4312573566","https://openalex.org/W4313007081","https://openalex.org/W4313192411","https://openalex.org/W4320489250","https://openalex.org/W4367595576","https://openalex.org/W4385245566","https://openalex.org/W6757817989","https://openalex.org/W6784094891","https://openalex.org/W6784930956","https://openalex.org/W6796402304","https://openalex.org/W6796494355","https://openalex.org/W6796505553","https://openalex.org/W6797162190","https://openalex.org/W6798837711","https://openalex.org/W6804552856","https://openalex.org/W6809716307","https://openalex.org/W6811230874","https://openalex.org/W6839371072","https://openalex.org/W6839769395","https://openalex.org/W6843861238","https://openalex.org/W6846581650","https://openalex.org/W6850239385","https://openalex.org/W6850496707","https://openalex.org/W6851578965","https://openalex.org/W6852276098","https://openalex.org/W6852629184","https://openalex.org/W6852694211","https://openalex.org/W6853702739"],"related_works":["https://openalex.org/W1574724839","https://openalex.org/W1623638113","https://openalex.org/W2169150177","https://openalex.org/W2589628610","https://openalex.org/W2213155192","https://openalex.org/W2899854151","https://openalex.org/W3151022025","https://openalex.org/W1508427695","https://openalex.org/W2096476311","https://openalex.org/W2406608628"],"abstract_inverted_index":{"Recently,":[0],"Transformer-based":[1],"offline":[2,44],"video":[3,31,39,51,70,79,115,144,176],"instance":[4,23,71,128,156],"segmentation":[5,19,72,223,234],"(VIS)":[6],"solutions":[7],"have":[8],"made":[9],"significant":[10,59],"progress":[11],"by":[12,162],"decomposing":[13],"the":[14,28,85,132,135,141,166,180,220,233],"whole":[15,136],"task":[16],"into":[17],"global":[18],"map":[20],"generation":[21],"and":[22,62,130,206],"discrimination.":[24],"We":[25],"argue":[26],"that":[27,33,153],"quality":[29,142,175,221],"of":[30,118,134,143,169,222],"queries":[32,52,87,112,116,160],"represent":[34],"all":[35],"instances":[36],"in":[37,58,158],"a":[38,68,148,186,225],"clip":[40],"is":[41,229,237],"crucial":[42],"for":[43,88,214],"VIS":[45,183],"methods.":[46],"Existing":[47],"methods":[48],"typically":[49],"interact":[50],"with":[53,185],"dense":[54],"spatio-temporal":[55],"features,":[56],"resulting":[57],"computational":[60],"complexity":[61],"redundant":[63],"information.":[64],"Thus,":[65],"we":[66,82,100,146],"propose":[67,101,147],"novel":[69],"framework,":[73],"LBVQ,":[74],"dedicated":[75],"to":[76,113,139,231],"learning":[77,173],"better":[78],"queries.":[80],"Specifically,":[81],"first":[83],"obtain":[84],"frame":[86,90,111,159,161],"each":[89],"independently":[91],"without":[92],"any":[93],"complex":[94],"inter-frame":[95],"spatial-temporal":[96],"association":[97],"operations.":[98],"Secondly,":[99],"an":[102],"adaptive":[103],"query":[104,149],"initialization":[105,121,124],"module":[106,151],"(AQI),":[107],"which":[108],"adaptively":[109],"integrates":[110],"initialize":[114],"instead":[117],"traditional":[119],"random":[120],"strategies.":[122],"This":[123],"method":[125],"preserves":[126],"rich":[127],"clues":[129],"accelerates":[131],"optimization":[133],"model.":[137],"Finally,":[138],"enhance":[140],"queries,":[145,177],"propagation":[150],"(QPM)":[152],"captures":[154],"relevant":[155],"information":[157],"frame,":[163],"greatly":[164],"improving":[165],"model\u2019s":[167],"understanding":[168],"long":[170,215],"videos.":[171,216],"By":[172],"higher":[174],"LBVQ":[178,199],"achieves":[179,200],"state-of-the-art":[181],"on":[182,193,203,209],"benchmarks":[184],"ResNet-50":[187],"backbone:":[188],"52.2":[189],"AP,":[190],"44.8":[191],"AP":[192,202,208],"YouTube-VIS":[194,204],"2019":[195],"&":[196],"2021.":[197],"Moreover,":[198],"39.7":[201],"2022":[205],"22.2":[207],"OVIS,":[210],"demonstrating":[211],"superior":[212],"potential":[213],"To":[217],"further":[218],"improve":[219],"masks,":[224],"large-scale":[226],"pretrained":[227],"SAM":[228],"employed":[230],"refine":[232],"results.":[235],"Code":[236],"available":[238],"at":[239],"https://github.com/fanghaook/LBVQ.":[240]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":4}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
