{"id":"https://openalex.org/W4402982018","doi":"https://doi.org/10.1109/icme57554.2024.10687844","title":"Multi-Modal Fusion and Query Refinement Network for Video Moment Retrieval and Highlight Detection","display_name":"Multi-Modal Fusion and Query Refinement Network for Video Moment Retrieval and Highlight Detection","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402982018","doi":"https://doi.org/10.1109/icme57554.2024.10687844"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2501.10692","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040712361","display_name":"Yifang Xu","orcid":"https://orcid.org/0009-0001-3738-789X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifang Xu","raw_affiliation_strings":["Nanjing University,School of Electronic Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,School of Electronic Science and Engineering,China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021416936","display_name":"Yunzhuo Sun","orcid":"https://orcid.org/0009-0006-8122-7259"},"institutions":[{"id":"https://openalex.org/I4210165606","display_name":"Hubei Normal University","ror":"https://ror.org/056y3dw16","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210165606"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunzhuo Sun","raw_affiliation_strings":["Hubei Normal University,School of Physics and Electronics,China"],"affiliations":[{"raw_affiliation_string":"Hubei Normal University,School of Physics and Electronics,China","institution_ids":["https://openalex.org/I4210165606"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113373662","display_name":"Benxiang Zhai","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Benxiang Zhai","raw_affiliation_strings":["Nanjing University,School of Electronic Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,School of Electronic Science and Engineering,China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104249063","display_name":"Zien Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zien Xie","raw_affiliation_strings":["Nanjing University,School of Electronic Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,School of Electronic Science and Engineering,China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111347031","display_name":"Youyao Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099297","display_name":"Jilian Technology Group (China)","ror":"https://ror.org/016q5ce10","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210099297"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youyao Jia","raw_affiliation_strings":["Gosuncn Chuanglian Technology Co., Ltd.,China"],"affiliations":[{"raw_affiliation_string":"Gosuncn Chuanglian Technology Co., Ltd.,China","institution_ids":["https://openalex.org/I4210099297"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016539834","display_name":"Sidan Du","orcid":"https://orcid.org/0000-0002-9966-3765"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sidan Du","raw_affiliation_strings":["Nanjing University,School of Electronic Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,School of Electronic Science and Engineering,China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5040712361"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":1.0296,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.78100257,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7814281582832336},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.5900506377220154},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5381646156311035},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5328298807144165},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5199912190437317},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4456614851951599},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.4405534267425537},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.42686256766319275}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7814281582832336},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.5900506377220154},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5381646156311035},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5328298807144165},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5199912190437317},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4456614851951599},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.4405534267425537},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.42686256766319275},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icme57554.2024.10687844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2501.10692","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.10692","pdf_url":"https://arxiv.org/pdf/2501.10692","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2501.10692","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.10692","pdf_url":"https://arxiv.org/pdf/2501.10692","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402982018.pdf"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W2102245440","https://openalex.org/W2167855619","https://openalex.org/W2908510526","https://openalex.org/W2919974746","https://openalex.org/W2948920874","https://openalex.org/W2963017553","https://openalex.org/W2964089981","https://openalex.org/W2997429269","https://openalex.org/W3096609285","https://openalex.org/W3107331169","https://openalex.org/W3168910151","https://openalex.org/W3214448253","https://openalex.org/W3215023725","https://openalex.org/W4308234134","https://openalex.org/W4312351586","https://openalex.org/W4312544061","https://openalex.org/W4313469377","https://openalex.org/W4322616103","https://openalex.org/W4322707256","https://openalex.org/W4376312579","https://openalex.org/W4383604991","https://openalex.org/W4385245566","https://openalex.org/W4386076176","https://openalex.org/W4389722225","https://openalex.org/W4402352082"],"related_works":["https://openalex.org/W2560191017","https://openalex.org/W2348892528","https://openalex.org/W2014728371","https://openalex.org/W3194422352","https://openalex.org/W2397627326","https://openalex.org/W1981131819","https://openalex.org/W2132659060","https://openalex.org/W2031992971","https://openalex.org/W3214791684","https://openalex.org/W2152662039"],"abstract_inverted_index":{"Given":[0],"a":[1,4,50,68,90],"video":[2,7],"and":[3,10,43,53,78,103,110,126],"linguistic":[5],"query,":[6],"moment":[8],"retrieval":[9],"highlight":[11],"detection":[12],"(MR&HD)":[13],"aim":[14],"to":[15,58,72,82],"locate":[16],"all":[17],"the":[18,35],"relevant":[19],"spans,":[20],"while":[21],"simultaneously":[22],"predicting":[23],"saliency":[24],"scores.":[25],"Most":[26],"existing":[27],"methods":[28],"utilize":[29],"RGB":[30],"images":[31],"as":[32],"input,":[33],"overlooking":[34],"inherent":[36],"multi-modal":[37,63,69],"visual":[38],"signals":[39],"like":[40],"optical":[41,76],"flow":[42],"depth.":[44],"In":[45],"this":[46],"paper,":[47],"we":[48,66,88],"propose":[49],"Multi-modal":[51],"Fusion":[52],"Query":[54],"Refinement":[55],"Network":[56],"(MRNet)":[57],"learn":[59],"complementary":[60],"information":[61],"from":[62],"cues.":[64],"Specifically,":[65],"design":[67],"fusion":[70],"module":[71,93],"dynamically":[73],"combine":[74],"RGB,":[75],"flow,":[77],"depth":[79],"map.":[80],"Furthermore,":[81],"simulate":[83],"human":[84],"understanding":[85],"of":[86],"sentences,":[87],"introduce":[89],"query":[91],"refinement":[92],"that":[94,114],"merges":[95],"text":[96],"at":[97],"different":[98],"granularities,":[99],"containing":[100],"word-,":[101],"phrase-,":[102],"sentence-wise":[104],"levels.":[105],"Comprehensive":[106],"experiments":[107],"on":[108,129],"QVHighlights":[109],"Charades":[111],"datasets":[112],"indicate":[113],"MRNet":[115],"outperforms":[116],"current":[117],"SOTA":[118],"methods,":[119],"achieving":[120],"notable":[121],"improvements":[122],"in":[123],"MR-mAP@Avg":[124],"(+3.41)":[125],"HD-HIT@1":[127],"(+3.46)":[128],"QVHighlights.":[130]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
