{"id":"https://openalex.org/W4394896927","doi":"https://doi.org/10.1109/tmm.2024.3387696","title":"Learning Feature Semantic Matching for Spatio-Temporal Video Grounding","display_name":"Learning Feature Semantic Matching for Spatio-Temporal Video Grounding","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4394896927","doi":"https://doi.org/10.1109/tmm.2024.3387696"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3387696","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3387696","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114833240","display_name":"Tong Zhang","orcid":"https://orcid.org/0000-0001-8163-3050"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tong Zhang","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068398083","display_name":"Hao Fang","orcid":"https://orcid.org/0000-0002-8846-8294"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Fang","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067846625","display_name":"Hao Zhang","orcid":"https://orcid.org/0000-0002-2725-6458"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hao Zhang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076814111","display_name":"Jialin Gao","orcid":"https://orcid.org/0000-0002-8554-7827"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jialin Gao","raw_affiliation_strings":["AI Singapore, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"AI Singapore, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081796777","display_name":"Xiankai Lu","orcid":"https://orcid.org/0000-0002-9543-6960"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiankai Lu","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019824207","display_name":"Xiushan Nie","orcid":"https://orcid.org/0000-0001-9644-9723"},"institutions":[{"id":"https://openalex.org/I44445938","display_name":"Shandong Jianzhu University","ror":"https://ror.org/01gbfax37","country_code":"CN","type":"education","lineage":["https://openalex.org/I44445938"]},{"id":"https://openalex.org/I4210144487","display_name":"Cloud Computing Center","ror":"https://ror.org/04aa0zm65","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210144487"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiushan Nie","raw_affiliation_strings":["Shandong Yunhai Guochuang Cloud Computing Equipment Industry Innovation Company, Ltd., Jinan, China","Shandong Yunhai Guochuang Cloud Computing Equipment Industry Innovation Co., Ltd. and the School of Computer Science and Technology, Shandong Jianzhu University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong Yunhai Guochuang Cloud Computing Equipment Industry Innovation Company, Ltd., Jinan, China","institution_ids":["https://openalex.org/I4210144487"]},{"raw_affiliation_string":"Shandong Yunhai Guochuang Cloud Computing Equipment Industry Innovation Co., Ltd. and the School of Computer Science and Technology, Shandong Jianzhu University, Jinan, China","institution_ids":["https://openalex.org/I44445938"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100672590","display_name":"Yilong Yin","orcid":"https://orcid.org/0000-0002-8465-1294"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yilong Yin","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5114833240"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":1.0399,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76254866,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"26","issue":null,"first_page":"9268","last_page":"9279"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8579589128494263},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5743837952613831},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5216758251190186},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4915209710597992},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4674098789691925},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33174335956573486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8579589128494263},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5743837952613831},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5216758251190186},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4915209710597992},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4674098789691925},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33174335956573486},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3387696","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3387696","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G210149415","display_name":null,"funder_award_id":"62176141","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4076825138","display_name":null,"funder_award_id":"62176139","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5394384052","display_name":null,"funder_award_id":"62106128","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5947912926","display_name":null,"funder_award_id":"ZR2021QF001","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"},{"id":"https://openalex.org/G6317468392","display_name":null,"funder_award_id":"tsqn202103088","funder_id":"https://openalex.org/F4320326189","funder_display_name":"Taishan Scholar Project of Shandong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null},{"id":"https://openalex.org/F4320326189","display_name":"Taishan Scholar Project of Shandong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2194775991","https://openalex.org/W2247513039","https://openalex.org/W2563032747","https://openalex.org/W2617576589","https://openalex.org/W2798354744","https://openalex.org/W2896457183","https://openalex.org/W2897628926","https://openalex.org/W2903901502","https://openalex.org/W2904910963","https://openalex.org/W2952524542","https://openalex.org/W2962703144","https://openalex.org/W2962869524","https://openalex.org/W2962974137","https://openalex.org/W2963017553","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2963521717","https://openalex.org/W2963843782","https://openalex.org/W2964089981","https://openalex.org/W2965373594","https://openalex.org/W2970231061","https://openalex.org/W2982065678","https://openalex.org/W2997429269","https://openalex.org/W3001091189","https://openalex.org/W3034743747","https://openalex.org/W3035049560","https://openalex.org/W3035265375","https://openalex.org/W3035590142","https://openalex.org/W3092462694","https://openalex.org/W3096609285","https://openalex.org/W3098232790","https://openalex.org/W3105232955","https://openalex.org/W3159619744","https://openalex.org/W3162694035","https://openalex.org/W3166712493","https://openalex.org/W3171516518","https://openalex.org/W3175082063","https://openalex.org/W3175402857","https://openalex.org/W3204090293","https://openalex.org/W3211953751","https://openalex.org/W3215899623","https://openalex.org/W4221166385","https://openalex.org/W4226024706","https://openalex.org/W4284693480","https://openalex.org/W4285146136","https://openalex.org/W4292433237","https://openalex.org/W4294691145","https://openalex.org/W4295046616","https://openalex.org/W4304086137","https://openalex.org/W4312341368","https://openalex.org/W4322706621","https://openalex.org/W4327852044","https://openalex.org/W4376481269","https://openalex.org/W4382462317","https://openalex.org/W4385574085","https://openalex.org/W4390871882","https://openalex.org/W6766673545","https://openalex.org/W6766904570","https://openalex.org/W6767211374","https://openalex.org/W6784094891","https://openalex.org/W6811230874","https://openalex.org/W6842959286"],"related_works":["https://openalex.org/W2033914206","https://openalex.org/W2042327336","https://openalex.org/W4386159726","https://openalex.org/W2601157893","https://openalex.org/W2131735617","https://openalex.org/W2373006798","https://openalex.org/W2056912418","https://openalex.org/W2123759770","https://openalex.org/W2033213769","https://openalex.org/W2811390910"],"abstract_inverted_index":{"Spatio-temporal":[0],"video":[1,119],"grounding":[2],"(STVG)":[3],"aims":[4],"to":[5,20,101,114,141,169,181],"localize":[6],"a":[7,21,71,88,98,109,137,163],"spatio-temporal":[8],"tube,":[9],"including":[10],"temporal":[11],"boundaries":[12],"and":[13,44,67,120,150,177],"object":[14],"bounding":[15],"boxes,":[16],"that":[17,194],"semantically":[18],"corresponds":[19],"given":[22],"language":[23],"description":[24],"in":[25,33,51,64,74],"an":[26,84],"untrimmed":[27],"video.":[28],"The":[29],"existing":[30],"onestage":[31],"solutions":[32],"this":[34],"task":[35],"face":[36],"two":[37,54,202],"significant":[38],"challenges,":[39],"namely,":[40],"vision-text":[41],"semantic":[42],"misalignment":[43],"spatial":[45,76,189],"mislocalization,":[46],"which":[47],"limit":[48],"their":[49],"performance":[50],"grounding.":[52],"These":[53],"limitations":[55],"are":[56],"mainly":[57],"caused":[58],"by":[59,146,185],"neglect":[60],"of":[61,126,158],"fine-grained":[62],"alignment":[63,117],"crossmodality":[65],"fusion":[66,132],"the":[68,103,124,130,156,171,174,178,182,187,198],"reliance":[69],"on":[70,97,201],"text-agnostic":[72],"query":[73,148,176],"sequentially":[75],"localization.":[77],"To":[78,154],"address":[79,102],"these":[80],"issues,":[81],"we":[82,135,161],"propose":[83],"effective":[85],"model":[86],"with":[87],"newly":[89],"designed":[90],"Feature":[91],"Semantic":[92],"Matching":[93],"(FSM)":[94],"module":[95,113,140],"based":[96],"Transformer":[99],"architecture":[100],"above":[104],"issues.":[105],"Our":[106],"method":[107,196],"introduces":[108],"crossmodal":[110],"feature":[111,131],"matching":[112,139],"achieve":[115],"multi-granularity":[116],"between":[118,173],"text":[121,183],"while":[122],"preventing":[123],"weakening":[125],"important":[127],"features":[128],"during":[129],"stage.":[133],"Additionally,":[134],"design":[136],"query-modulated":[138],"facilitate":[142],"text-relevant":[143],"tube":[144,159],"construction":[145],"multiple":[147],"generation":[149],"tubulet":[151],"sequence":[152],"matching.":[153],"ensure":[155],"quality":[157],"construction,":[160],"employ":[162],"novel":[164],"mismatching":[165,172],"rectify":[166,170],"contrastive":[167],"loss":[168],"learnable":[175],"objects":[179],"corresponding":[180],"descriptions":[184],"restricting":[186],"generated":[188],"query.":[190],"Extensive":[191],"experiments":[192],"demonstrate":[193],"our":[195],"outperforms":[197],"state-of-the-art":[199],"methods":[200],"challenging":[203],"STVG":[204],"benchmarks.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
