{"id":"https://openalex.org/W4410949579","doi":"https://doi.org/10.1109/tcsvt.2025.3575957","title":"Fine-Grained Alignment and Interaction for Video Grounding With Cross-Modal Semantic Hierarchical Graph","display_name":"Fine-Grained Alignment and Interaction for Video Grounding With Cross-Modal Semantic Hierarchical Graph","publication_year":2025,"publication_date":"2025-06-02","ids":{"openalex":"https://openalex.org/W4410949579","doi":"https://doi.org/10.1109/tcsvt.2025.3575957"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3575957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3575957","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100638440","display_name":"Ran Ran","orcid":"https://orcid.org/0000-0001-9350-1389"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ran Ran","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0001-9350-1389","affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060342584","display_name":"Jiwei Wei","orcid":"https://orcid.org/0000-0003-3912-1742"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiwei Wei","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0003-3912-1742","affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114546914","display_name":"Shiyuan He","orcid":"https://orcid.org/0009-0005-6287-6969"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyuan He","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0009-0005-6287-6969","affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018877295","display_name":"Yuyang Zhou","orcid":"https://orcid.org/0000-0002-2188-2781"},"institutions":[{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuyang Zhou","raw_affiliation_strings":["Institute of Electronic and Information Engineering, Hainan University, Haikou, China","Hainan University, Haikou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Electronic and Information Engineering, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]},{"raw_affiliation_string":"Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396039","display_name":"Peng Wang","orcid":"https://orcid.org/0000-0002-5397-9115"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Wang","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100397616","display_name":"Yang Yang","orcid":"https://orcid.org/0000-0002-5070-4511"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yang","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0002-5070-4511","affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052993469","display_name":"Heng Tao Shen","orcid":"https://orcid.org/0000-0002-2999-2088"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Tao Shen","raw_affiliation_strings":["Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0002-2999-2088","affiliations":[{"raw_affiliation_string":"Center for Future Media and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100638440"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.78410114,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"35","issue":"11","first_page":"11641","last_page":"11654"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7252573370933533},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6722942590713501},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4700435698032379},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43285268545150757},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3778197467327118},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.34649401903152466}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7252573370933533},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6722942590713501},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4700435698032379},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43285268545150757},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3778197467327118},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34649401903152466},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3575957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3575957","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1294623083","display_name":null,"funder_award_id":"62220106008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3352021476","display_name":null,"funder_award_id":"2024NSFSC1463","funder_id":"https://openalex.org/F4320333335","funder_display_name":"Sichuan Province Science and Technology Support Program"},{"id":"https://openalex.org/G591136627","display_name":null,"funder_award_id":"62306067","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8037507267","display_name":null,"funder_award_id":"2025A1515010108","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320333335","display_name":"Sichuan Province Science and Technology Support Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1927052826","https://openalex.org/W2111078031","https://openalex.org/W2337252826","https://openalex.org/W2890502146","https://openalex.org/W2896457183","https://openalex.org/W2904824998","https://openalex.org/W2962964995","https://openalex.org/W2963017553","https://openalex.org/W2963916161","https://openalex.org/W2964089981","https://openalex.org/W2970898753","https://openalex.org/W2997429269","https://openalex.org/W2998495542","https://openalex.org/W3013853001","https://openalex.org/W3025323587","https://openalex.org/W3034743747","https://openalex.org/W3035339529","https://openalex.org/W3035356601","https://openalex.org/W3035640828","https://openalex.org/W3092739351","https://openalex.org/W3095206982","https://openalex.org/W3109028199","https://openalex.org/W3115162161","https://openalex.org/W3124671614","https://openalex.org/W3141907748","https://openalex.org/W3152619510","https://openalex.org/W3154682722","https://openalex.org/W3166712493","https://openalex.org/W3174490084","https://openalex.org/W3175082063","https://openalex.org/W3176471072","https://openalex.org/W3180476551","https://openalex.org/W3198384869","https://openalex.org/W3199096350","https://openalex.org/W3211772574","https://openalex.org/W4200631219","https://openalex.org/W4214582399","https://openalex.org/W4214931087","https://openalex.org/W4221148502","https://openalex.org/W4221150632","https://openalex.org/W4221154629","https://openalex.org/W4284696747","https://openalex.org/W4304086137","https://openalex.org/W4312467626","https://openalex.org/W4320713023","https://openalex.org/W4322576838","https://openalex.org/W4352977266","https://openalex.org/W4382240078","https://openalex.org/W4383112706","https://openalex.org/W4385757441","https://openalex.org/W4386066129","https://openalex.org/W4386076371","https://openalex.org/W4386076698","https://openalex.org/W4386231783","https://openalex.org/W4388676567","https://openalex.org/W4390874127","https://openalex.org/W4391941517","https://openalex.org/W4400679250","https://openalex.org/W4402125038","https://openalex.org/W4404469286"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0],"grounding":[1],"tasks":[2],"have":[3],"recently":[4],"gained":[5],"significant":[6],"attention.":[7],"However,":[8],"existing":[9],"methods":[10],"failed":[11],"to":[12,34,45,61,71,97,119],"fully":[13,120],"comprehend":[14],"the":[15,26,36,79,104,122,126,139,169,201],"semantics":[16,113,162],"within":[17,125],"queries":[18],"and":[19,32,42,64,81,90,114,142,160,179,197],"videos,":[20],"often":[21],"overlooking":[22],"key":[23],"content.":[24],"Moreover,":[25,166],"lack":[27],"of":[28,39,111,177],"fine-grained":[29,73,115,158],"cross-modal":[30,67,116,127,148],"alignment":[31],"interaction":[33,173],"guide":[35],"semantic":[37,68,74,128],"matching":[38,150],"complex":[40],"texts":[41],"videos":[43],"lead":[44],"inconsistent":[46],"representational":[47],"modeling.":[48],"To":[49],"address":[50],"this":[51],"issue,":[52],"we":[53,131,167],"propose":[54],"a":[55,66,108],"Semantic":[56],"Hierarchical":[57],"Grounding":[58],"model,":[59],"referred":[60],"as":[62],"SHG,":[63],"design":[65],"hierarchical":[69,129,149,171],"graph":[70,172],"achieve":[72,146],"understanding.":[75],"SHG":[76],"decomposes":[77],"both":[78],"query":[80],"each":[82],"video":[83,185],"moment":[84],"into":[85],"three":[86,191],"levels:":[87],"global,":[88],"action,":[89],"element.":[91],"This":[92,154],"topology,":[93],"ranging":[94],"from":[95],"global":[96,152],"local,":[98],"establishes":[99],"multigranularity":[100],"intrinsic":[101],"connections":[102],"between":[103],"two":[105],"modalities,":[106],"fostering":[107],"comprehensive":[109],"understanding":[110],"dynamic":[112],"matching.":[117],"Accordingly,":[118],"leverage":[121],"rich":[123],"information":[124],"graph,":[130],"employ":[132],"contrastive":[133],"learning":[134],"by":[135],"seeking":[136],"samples":[137],"with":[138],"same":[140],"action":[141],"element":[143],"semantics,":[144],"then":[145],"node-moment":[147],"for":[151,174],"alignment.":[153],"approach":[155,203],"can":[156],"unearth":[157],"clues":[159],"align":[161],"across":[163],"multiple":[164],"granularities.":[165],"combine":[168],"designed":[170],"coarse-to-fine":[175],"fusion":[176],"text":[178],"video,":[180],"thereby":[181],"enabling":[182],"highly":[183],"accurate":[184],"grounding.":[186],"Extensive":[187],"experiments":[188],"conducted":[189],"on":[190],"challenging":[192],"public":[193],"datasets":[194],"(ActivityNet-Captions,":[195],"TACoS,":[196],"Charades-STA)":[198],"demonstrate":[199],"that":[200],"proposed":[202],"outperforms":[204],"state-of-the-art":[205],"techniques,":[206],"validating":[207],"its":[208],"effectiveness.":[209]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
