{"id":"https://openalex.org/W4415709714","doi":"https://doi.org/10.1109/icme59968.2025.11209120","title":"DF-Net: A Dual Fusion Network for Accurate Video Temporal Grounding","display_name":"DF-Net: A Dual Fusion Network for Accurate Video Temporal Grounding","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415709714","doi":"https://doi.org/10.1109/icme59968.2025.11209120"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112974351","display_name":"Haolong Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haolong Yan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050489954","display_name":"Binghao Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Binghao Tang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079515653","display_name":"Boda Lin","orcid":"https://orcid.org/0000-0002-6627-7200"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Boda Lin","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024716590","display_name":"Jiachen Li","orcid":"https://orcid.org/0000-0002-3543-6088"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiachen Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100391321","display_name":"Si Li","orcid":"https://orcid.org/0000-0002-4627-5363"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Si Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2673567,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7386000156402588,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7386000156402588,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.07530000060796738,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.07240000367164612,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6122000217437744},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.48890000581741333},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4697999954223633},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4641999900341034},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.41760000586509705},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.4117000102996826},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.4047999978065491},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.39070001244544983},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.34540000557899475}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7537999749183655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.626800000667572},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6122000217437744},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5551999807357788},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.48890000581741333},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4697999954223633},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4641999900341034},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.39070001244544983},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.34540000557899475},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3231000006198883},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3172999918460846},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.30660000443458557},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C158251709","wikidata":"https://www.wikidata.org/wiki/Q354025","display_name":"Intrusion","level":2,"score":0.2711000144481659}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2111078031","https://openalex.org/W2337252826","https://openalex.org/W2963916161","https://openalex.org/W2964089981","https://openalex.org/W2990763144","https://openalex.org/W2997429269","https://openalex.org/W2998495542","https://openalex.org/W3035640828","https://openalex.org/W3174364033","https://openalex.org/W3175082063","https://openalex.org/W3176201273","https://openalex.org/W3176763654","https://openalex.org/W3178087530","https://openalex.org/W3180476551","https://openalex.org/W3199096350","https://openalex.org/W3200255008","https://openalex.org/W3200924860","https://openalex.org/W3209586588","https://openalex.org/W3211953751","https://openalex.org/W3216763528","https://openalex.org/W4221154629","https://openalex.org/W4385573631","https://openalex.org/W4390874127","https://openalex.org/W4393148542","https://openalex.org/W4402727272","https://openalex.org/W4402753741","https://openalex.org/W4402754238","https://openalex.org/W4403791359","https://openalex.org/W4411244918","https://openalex.org/W4413147815"],"related_works":[],"abstract_inverted_index":{"Video":[0],"Temporal":[1],"Grounding":[2],"(VTG)":[3],"involves":[4],"locating":[5],"the":[6,53,59,83,96],"start":[7],"and":[8,30,56,67,73,79,81,100,112,119],"end":[9],"times":[10],"of":[11],"a":[12,17,41,63,91,102],"video":[13,121],"clip":[14],"based":[15],"on":[16,117],"given":[18],"textual":[19],"query.":[20],"Existing":[21],"methods":[22],"face":[23],"challenges":[24],"including":[25],"temporal":[26],"boundary":[27],"localization":[28],"bias":[29],"class":[31],"imbalance":[32],"in":[33,46,52],"points":[34],"classification.":[35],"To":[36],"alleviate":[37],"these,":[38],"we":[39,61,89],"propose":[40,101],"Dual":[42],"Fusion":[43],"Network":[44],"(DF-Net)":[45],"which":[47],"cross-modal":[48,98],"fusion":[49,99],"processes":[50],"occur":[51],"joint":[54],"encoder":[55],"decoder.":[57],"In":[58,87],"encoder,":[60],"design":[62,95],"multi-step":[64],"sampling":[65],"strategy":[66],"knowledge":[68],"aggregation":[69],"module":[70],"to":[71,94],"extract":[72],"aggregate":[74],"features":[75],"from":[76],"central":[77],"frames":[78],"neighbors":[80],"undergo":[82],"first":[84],"cross-model":[85],"fusion.":[86],"decoding,":[88],"employ":[90],"transformer":[92],"decoder":[93],"second":[97],"novel":[103],"Kullback-Leibler":[104],"(KL)":[105],"divergence-based":[106],"objective":[107],"function":[108],"combined":[109],"with":[110],"focal":[111],"distance-based":[113],"regression":[114],"losses.":[115],"Experiments":[116],"short":[118],"long":[120],"benchmarks":[122],"demonstrate":[123],"significant":[124],"performance":[125],"gains.":[126]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-30T00:00:00"}
