{"id":"https://openalex.org/W4399418564","doi":"https://doi.org/10.1145/3652583.3658096","title":"A Parallel Transformer Framework for Video Moment Retrieval","display_name":"A Parallel Transformer Framework for Video Moment Retrieval","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399418564","doi":"https://doi.org/10.1145/3652583.3658096"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658096","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658096","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658096","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075657675","display_name":"Thao-Nhu Nguyen","orcid":"https://orcid.org/0000-0003-1356-9434"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Thao-Nhu Nguyen","raw_affiliation_strings":["Dublin City University, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0003-1356-9434","affiliations":[{"raw_affiliation_string":"Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016271188","display_name":"Zongyao Li","orcid":"https://orcid.org/0000-0002-3300-1806"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Zongyao Li","raw_affiliation_strings":["NEC Corporation, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-3300-1806","affiliations":[{"raw_affiliation_string":"NEC Corporation, Tokyo, Japan","institution_ids":["https://openalex.org/I118347220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026740218","display_name":"Satoshi Yamazaki","orcid":"https://orcid.org/0000-0002-4673-6924"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yamazaki Satoshi","raw_affiliation_strings":["NEC Corporation, Tokyo, JP"],"raw_orcid":"https://orcid.org/0000-0002-4673-6924","affiliations":[{"raw_affiliation_string":"NEC Corporation, Tokyo, JP","institution_ids":["https://openalex.org/I118347220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079797505","display_name":"Jianquan Liu","orcid":"https://orcid.org/0000-0003-4303-9020"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jianquan Liu","raw_affiliation_strings":["NEC Corporation, Tokyo, JP"],"raw_orcid":"https://orcid.org/0000-0003-4303-9020","affiliations":[{"raw_affiliation_string":"NEC Corporation, Tokyo, JP","institution_ids":["https://openalex.org/I118347220"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014224452","display_name":"Cathal Gurrin","orcid":"https://orcid.org/0000-0003-2903-3968"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Cathal Gurrin","raw_affiliation_strings":["Dublin City University, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0003-2903-3968","affiliations":[{"raw_affiliation_string":"Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075657675"],"corresponding_institution_ids":["https://openalex.org/I42934936"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06958008,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"460","last_page":"468"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7960348129272461},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5664933323860168},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5205432176589966},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.492057740688324},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4299645721912384},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.4247640371322632},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.332187294960022},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.12545251846313477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7960348129272461},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5664933323860168},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5205432176589966},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.492057740688324},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4299645721912384},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.4247640371322632},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.332187294960022},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.12545251846313477},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3652583.3658096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658096","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658096","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:doras.dcu.ie:30558","is_oa":false,"landing_page_url":"https://orcid.org/0000-0003-1356-9434>,","pdf_url":null,"source":{"id":"https://openalex.org/S4306401511","display_name":"Dublin City University Open Access Institutional Repository (Dublin City University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I42934936","host_organization_name":"Dublin City University","host_organization_lineage":["https://openalex.org/I42934936"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658096","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658096","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8414697727","display_name":null,"funder_award_id":"18/CRT/6223","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399418564.pdf"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W2072551889","https://openalex.org/W2077069816","https://openalex.org/W2739107216","https://openalex.org/W2948958195","https://openalex.org/W2963101956","https://openalex.org/W2963524571","https://openalex.org/W2964089981","https://openalex.org/W3034743747","https://openalex.org/W3035392611","https://openalex.org/W3043840704","https://openalex.org/W3101429639","https://openalex.org/W3176471072","https://openalex.org/W3199871897","https://openalex.org/W3216551675","https://openalex.org/W4214582399","https://openalex.org/W4366352717","https://openalex.org/W4386083094","https://openalex.org/W4390871861","https://openalex.org/W6714454088"],"related_works":["https://openalex.org/W4310420093","https://openalex.org/W4213212078","https://openalex.org/W3154635860","https://openalex.org/W4226332880","https://openalex.org/W4298054035","https://openalex.org/W2997877535","https://openalex.org/W4200631625","https://openalex.org/W2379525370","https://openalex.org/W2787695722","https://openalex.org/W2964367370"],"abstract_inverted_index":{"In":[0],"the":[1,20,38,43,50,55,60,82,107,111,114,121,131,135,149,154,158],"realm":[2],"of":[3,22,25,95,101],"video":[4,44],"understanding,":[5],"Video":[6],"Moment":[7],"Retrieval":[8],"(VMR)":[9],"is":[10],"an":[11,75],"important":[12],"yet":[13],"challenging":[14],"task":[15],"that":[16,58,80,153],"aims":[17],"to":[18],"locate":[19],"boundary":[21],"a":[23,28,162],"moment":[24],"interest":[26],"within":[27],"long":[29],"untrimmed":[30],"video.":[31],"Existing":[32],"VMR":[33],"methods":[34,160],"often":[35],"focus":[36],"on":[37,148],"visual":[39,89,125],"content":[40,62],"extracted":[41],"from":[42],"only":[45],"(or":[46],"frame":[47],"sequences),":[48],"however,":[49],"rich":[51],"semantic":[52],"information":[53],"at":[54,169],"object":[56],"level":[57],"describes":[59],"image's":[61],"has":[63],"not":[64],"been":[65],"explored":[66],"yet.":[67],"To":[68],"overcome":[69],"those":[70],"limitations,":[71],"we":[72],"propose":[73],"PaTF,":[74],"attention-based":[76],"<u>Pa</u>rallel":[77],"<u>T</u>ransformer":[78],"<u>F</u>ramework":[79],"enriches":[81],"feature":[83],"representations":[84],"by":[85,161],"exploring":[86],"both":[87],"low-level":[88],"cues":[90],"and":[91,110,127,167,175],"high-level":[92],"relational":[93],"contexts":[94],"video-query":[96],"pairs.":[97],"Our":[98],"framework":[99,156],"consists":[100],"two":[102],"parallel":[103],"transformers:":[104],"one":[105],"for":[106,113],"visual-textual":[108,118],"stream":[109,119,133],"other":[112],"semantic-textual":[115,132],"stream.":[116],"The":[117],"extracts":[120],"links":[122],"between":[123,137],"global":[124],"features":[126],"textual":[128],"information,":[129],"while":[130],"emphasises":[134],"relations":[136],"objects":[138],"via":[139],"scene":[140],"graph":[141],"representations.":[142],"Furthermore,":[143],"our":[144],"comprehensive":[145],"experiment":[146],"conducted":[147],"Charades-STA":[150],"dataset":[151],"demonstrates":[152],"proposed":[155],"outperforms":[157],"state-of-the-art":[159],"large":[163],"margin,":[164],"roughly":[165],"5%":[166],"7%":[168],"Recall@1":[170],"with":[171],"IoU":[172,176],"=":[173,177],"0.5":[174],"0.7,":[178],"respectively.":[179]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
