{"id":"https://openalex.org/W4399768906","doi":"https://doi.org/10.1109/tpami.2024.3415087","title":"Single-Frame Supervision for Spatio-Temporal Video Grounding","display_name":"Single-Frame Supervision for Spatio-Temporal Video Grounding","publication_year":2024,"publication_date":"2024-06-18","ids":{"openalex":"https://openalex.org/W4399768906","doi":"https://doi.org/10.1109/tpami.2024.3415087","pmid":"https://pubmed.ncbi.nlm.nih.gov/38889039"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3415087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3415087","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037741758","display_name":"K. J. Ray Liu","orcid":"https://orcid.org/0009-0001-3246-3275"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kun Liu","raw_affiliation_strings":["JD Logistics, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-3246-3275","affiliations":[{"raw_affiliation_string":"JD Logistics, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009938315","display_name":"Mengxue Qu","orcid":"https://orcid.org/0000-0001-9432-0205"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengxue Qu","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China","Beijing Jiaotong University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Jiaotong University, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100355962","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-6825-1936"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["JD Logistics, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6825-1936","affiliations":[{"raw_affiliation_string":"JD Logistics, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087043856","display_name":"Yunchao Wei","orcid":"https://orcid.org/0000-0002-2812-8781"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunchao Wei","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China","Beijing Jiaotong University, China"],"raw_orcid":"https://orcid.org/0000-0002-2812-8781","affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Jiaotong University, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015611759","display_name":"Wenming Zhe","orcid":"https://orcid.org/0000-0003-1753-5784"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenming Zhe","raw_affiliation_strings":["JD Logistics, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1753-5784","affiliations":[{"raw_affiliation_string":"JD Logistics, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100362745","display_name":"Yao Zhao","orcid":"https://orcid.org/0000-0002-8581-9554"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhao","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China","Beijing Jiaotong University, China"],"raw_orcid":"https://orcid.org/0000-0002-8581-9554","affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Jiaotong University, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068917997","display_name":"Wu Liu","orcid":"https://orcid.org/0000-0003-1633-7575"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Liu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China","University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0003-1633-7575","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5037741758"],"corresponding_institution_ids":["https://openalex.org/I4210103986"],"apc_list":null,"apc_paid":null,"fwci":0.7142,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69807129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"47","issue":"7","first_page":"5177","last_page":"5191"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7197167277336121},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.59647536277771},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5922355055809021},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5648524761199951},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.16194528341293335}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7197167277336121},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.59647536277771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5922355055809021},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5648524761199951},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.16194528341293335}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3415087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3415087","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38889039","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38889039","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Climate action","score":0.4399999976158142,"id":"https://metadata.un.org/sdg/13"}],"awards":[{"id":"https://openalex.org/G3944096777","display_name":null,"funder_award_id":"20220484063","funder_id":"https://openalex.org/F4320334978","funder_display_name":"Beijing Nova Program"}],"funders":[{"id":"https://openalex.org/F4320334978","display_name":"Beijing Nova Program","ror":"https://ror.org/034k14f91"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1927052826","https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2247513039","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2606473278","https://openalex.org/W2618799552","https://openalex.org/W2890502146","https://openalex.org/W2894280539","https://openalex.org/W2903901502","https://openalex.org/W2904824998","https://openalex.org/W2946086442","https://openalex.org/W2948958195","https://openalex.org/W2951323451","https://openalex.org/W2962766617","https://openalex.org/W2963017553","https://openalex.org/W2963109634","https://openalex.org/W2963393391","https://openalex.org/W2963521717","https://openalex.org/W2963534981","https://openalex.org/W2963662190","https://openalex.org/W2963795951","https://openalex.org/W2963843782","https://openalex.org/W2963916161","https://openalex.org/W2964089981","https://openalex.org/W2968101724","https://openalex.org/W2970401629","https://openalex.org/W2970898753","https://openalex.org/W2984121207","https://openalex.org/W2986803748","https://openalex.org/W2987734933","https://openalex.org/W2997429269","https://openalex.org/W3034772468","https://openalex.org/W3035049560","https://openalex.org/W3035590142","https://openalex.org/W3093034080","https://openalex.org/W3095669214","https://openalex.org/W3110435696","https://openalex.org/W3159619744","https://openalex.org/W3166712493","https://openalex.org/W3199096350","https://openalex.org/W3202825210","https://openalex.org/W3204090293","https://openalex.org/W3207454933","https://openalex.org/W4214490042","https://openalex.org/W4214759957","https://openalex.org/W4221166385","https://openalex.org/W4224314500","https://openalex.org/W4292092982","https://openalex.org/W4298364821","https://openalex.org/W4304091802","https://openalex.org/W4312805142","https://openalex.org/W4327852044","https://openalex.org/W4385804968","https://openalex.org/W4386047823","https://openalex.org/W4386076413","https://openalex.org/W4387968586","https://openalex.org/W4387969678","https://openalex.org/W4390189551","https://openalex.org/W4391941517","https://openalex.org/W6739901393","https://openalex.org/W6757135208","https://openalex.org/W6766673545","https://openalex.org/W6791353385","https://openalex.org/W6842959286","https://openalex.org/W7034702946"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Spatio-Temporal":[0],"Video":[1],"Grounding":[2],"(STVG)":[3],"aims":[4],"at":[5],"localizing":[6],"the":[7,25,63,78,82,87,92,95,113,133,152,193],"spatio-temporal":[8],"tube":[9],"of":[10,27,81,94,132,154],"a":[11,19,52,69,74,100,122,175,209],"specific":[12],"object":[13],"in":[14,40,46,148,170],"an":[15],"untrimmed":[16],"video":[17],"given":[18],"free-form":[20],"natural":[21],"language":[22],"query.":[23],"As":[24],"annotation":[26,248],"tubes":[28],"is":[29],"labor":[30,249],"intensive,":[31],"researchers":[32],"are":[33,136],"motivated":[34],"to":[35,116,125,144,184,192],"explore":[36],"weakly":[37],"supervised":[38,84],"approaches":[39],"recent":[41],"works,":[42],"which":[43,107],"usually":[44],"results":[45],"significant":[47],"performance":[48],"degradation.":[49],"To":[50,199],"achieve":[51],"less":[53],"expensive":[54],"STVG":[55,96],"method":[56,135],"with":[57,73,196,218,245],"acceptable":[58],"accuracy,":[59],"this":[60,204],"work":[61],"investigates":[62],"\"single-frame":[64],"supervision\"":[65],"paradigm":[66],"that":[67,179,228],"requires":[68],"single":[70],"frame":[71,115],"labeled":[72],"bounding":[75],"box":[76],"within":[77],"temporal":[79,187],"boundary":[80],"fully":[83],"counterpart":[85],"as":[86],"supervisory":[88],"signal.":[89],"Based":[90],"on":[91,203,215,224],"characteristics":[93],"problem,":[97],"we":[98,206],"propose":[99],"Two-Stage":[101],"Multiple":[102],"Instance":[103],"Learning":[104],"(T-SMILE)":[105],"method,":[106],"creates":[108],"pseudo":[109],"labels":[110],"by":[111,165],"expanding":[112],"annotated":[114],"its":[117],"contextual":[118],"frames,":[119],"thereby":[120,189],"establishing":[121],"fully-supervised":[123,242],"problem":[124],"facilitate":[126,200],"further":[127],"model":[128],"training.":[129],"The":[130,221],"innovations":[131],"proposed":[134],"three-folded,":[137],"including":[138],"1)":[139],"utilizing":[140],"multiple":[141],"instance":[142],"learning":[143,160,194],"dynamically":[145],"select":[146],"instances":[147],"positive":[149],"bags":[150],"for":[151],"recognition":[153],"starting":[155],"and":[156,172,186],"ending":[157],"timestamps,":[158],"2)":[159],"highly":[161],"discriminative":[162],"query":[163],"features":[164],"incorporating":[166],"spatial":[167,185],"prior":[168],"constraints":[169],"cross-attention,":[171],"3)":[173],"designing":[174],"curriculum":[176],"learning-based":[177],"strategy":[178],"iterative":[180],"assigns":[181],"dynamic":[182],"weights":[183],"branches,":[188],"gradually":[190],"adapting":[191],"branch":[195],"larger":[197],"difficulty.":[198],"future":[201],"research":[202],"task,":[205],"also":[207,237],"contribute":[208],"large-scale":[210],"benchmark":[211],"containing":[212],"12,469":[213],"videos":[214],"complex":[216],"scenes":[217],"single-frame":[219],"annotation.":[220],"extensive":[222],"experiments":[223],"two":[225],"benchmarks":[226],"demonstrate":[227],"T-SMILE":[229],"significantly":[230],"outperforms":[231],"all":[232],"weakly-supervised":[233],"methods.":[234],"Remarkably,":[235],"it":[236],"performs":[238],"better":[239],"than":[240],"some":[241],"methods":[243],"associated":[244],"much":[246],"more":[247],"costs.":[250]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
