{"id":"https://openalex.org/W4399423018","doi":"https://doi.org/10.1145/3652583.3658052","title":"Improving Interpretable Embeddings for Ad-hoc Video Search with Generative Captions and Multi-word Concept Bank","display_name":"Improving Interpretable Embeddings for Ad-hoc Video Search with Generative Captions and Multi-word Concept Bank","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399423018","doi":"https://doi.org/10.1145/3652583.3658052"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658052","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658052","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658052","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658052","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056908913","display_name":"Jiaxin Wu","orcid":"https://orcid.org/0000-0003-4074-3442"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Jiaxin Wu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010722442","display_name":"Chong\u2010Wah Ngo","orcid":"https://orcid.org/0000-0003-4182-8261"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chong-Wah Ngo","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020936420","display_name":"W. K. Chan","orcid":"https://orcid.org/0000-0001-7726-6235"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wing-Kwong Chan","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056908913"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":0.2569,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.49372957,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"73","last_page":"82"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8748764991760254},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5104271769523621},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4960237443447113},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4927809536457062},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48425909876823425},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4783874750137329},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.47029468417167664},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4657258689403534},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46004369854927063},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.41564589738845825},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2458178699016571}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8748764991760254},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5104271769523621},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4960237443447113},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4927809536457062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48425909876823425},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4783874750137329},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.47029468417167664},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4657258689403534},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46004369854927063},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.41564589738845825},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2458178699016571},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3658052","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658052","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658052","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658052","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658052","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658052","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5016601650","display_name":null,"funder_award_id":"Academic Research Fund","funder_id":"https://openalex.org/F4320320751","funder_display_name":"Ministry of Education - Singapore"},{"id":"https://openalex.org/G6341628774","display_name":null,"funder_award_id":"9678180","funder_id":"https://openalex.org/F4320309893","funder_display_name":"City University of Hong Kong"},{"id":"https://openalex.org/G6854926366","display_name":null,"funder_award_id":"Tier 2","funder_id":"https://openalex.org/F4320320751","funder_display_name":"Ministry of Education - Singapore"},{"id":"https://openalex.org/G901625343","display_name":null,"funder_award_id":"Academic Research F","funder_id":"https://openalex.org/F4320320751","funder_display_name":"Ministry of Education - Singapore"}],"funders":[{"id":"https://openalex.org/F4320309893","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23"},{"id":"https://openalex.org/F4320320751","display_name":"Ministry of Education - Singapore","ror":"https://ror.org/01kcva023"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399423018.pdf","grobid_xml":"https://content.openalex.org/works/W4399423018.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1999970330","https://openalex.org/W2062903088","https://openalex.org/W2081580037","https://openalex.org/W2089150756","https://openalex.org/W2148809503","https://openalex.org/W2185175083","https://openalex.org/W2250384498","https://openalex.org/W2411317681","https://openalex.org/W2425121537","https://openalex.org/W2560674852","https://openalex.org/W2596164567","https://openalex.org/W2616994964","https://openalex.org/W2735355755","https://openalex.org/W2753311918","https://openalex.org/W2761419673","https://openalex.org/W2914911817","https://openalex.org/W2916245046","https://openalex.org/W2950557411","https://openalex.org/W2963293463","https://openalex.org/W2975813532","https://openalex.org/W2981716253","https://openalex.org/W2984008963","https://openalex.org/W2989322838","https://openalex.org/W3033977115","https://openalex.org/W3093002089","https://openalex.org/W3096698434","https://openalex.org/W3102887392","https://openalex.org/W3107973541","https://openalex.org/W3130796238","https://openalex.org/W3169472988","https://openalex.org/W4211053420","https://openalex.org/W4296262844","https://openalex.org/W4312777110","https://openalex.org/W4324157320","https://openalex.org/W4388638745","https://openalex.org/W4390871860"],"related_works":["https://openalex.org/W3125011624","https://openalex.org/W1508631387","https://openalex.org/W2380075625","https://openalex.org/W2074099744","https://openalex.org/W4391150403","https://openalex.org/W2081749821","https://openalex.org/W1968572830","https://openalex.org/W2090814745","https://openalex.org/W2029896371","https://openalex.org/W2136497797"],"abstract_inverted_index":{"Aligning":[0],"a":[1,68,73,80,85,106,119,180],"user":[2],"query":[3,129,173],"and":[4,11,41,56,71,94,165,193],"video":[5,22,95],"clips":[6],"in":[7,50,125],"cross-modal":[8],"latent":[9],"space":[10],"that":[12,146],"with":[13,186],"semantic":[14],"concepts":[15],"are":[16,195],"two":[17,64],"mainstream":[18],"approaches":[19,30],"for":[20,97,175],"ad-hoc":[21],"search":[23],"(AVS).":[24],"However,":[25],"the":[26,34,42,51,57,101,116,134,141,147,150,154,158,162,167,170],"effectiveness":[27],"of":[28,37,45,53,89,118,136,149,157],"existing":[29],"is":[31],"bottlenecked":[32],"by":[33,66,179],"small":[35],"sizes":[36],"available":[38,196],"video-text":[39],"datasets":[40],"low":[43],"quality":[44],"concept":[46,75,108],"banks,":[47],"which":[48],"results":[49,144],"failures":[52],"unseen":[54],"queries":[55],"out-of-vocabulary":[58,102],"problem.":[59],"This":[60],"paper":[61],"addresses":[62],"these":[63],"problems":[65],"constructing":[67],"new":[69,86],"dataset":[70,87,164],"developing":[72],"multi-word":[74,107],"bank.":[76],"Specifically,":[77],"capitalizing":[78],"on":[79,111,140,161,169],"generative":[81],"model,":[82],"we":[83,104],"construct":[84],"consisting":[88],"7":[90],"million":[91],"generated":[92],"text":[93],"pairs":[96],"pre-training.":[98],"To":[99],"tackle":[100],"problem,":[103],"develop":[105],"bank":[109],"based":[110],"syntax":[112],"analysis":[113],"to":[114,184],"enhance":[115],"capability":[117],"state-of-the-":[120],"art":[121],"interpretable":[122],"AVS":[123,159,172],"method":[124,160],"modelling":[126],"relationships":[127],"between":[128],"words.":[130],"We":[131],"also":[132],"study":[133],"impact":[135],"current":[137],"advanced":[138],"features":[139],"method.":[142],"Experimental":[143],"show":[145],"integration":[148],"above-proposed":[151],"elements":[152],"doubles":[153],"R@1":[155],"performance":[156],"MSRVTT":[163],"improves":[166],"xinfAP":[168],"TRECVid":[171],"sets":[174],"2016-2023":[176],"(eight":[177],"years)":[178],"margin":[181],"from":[182],"2%":[183],"77%,":[185],"an":[187],"average":[188],"about":[189],"20%.":[190],"The":[191],"code":[192],"model":[194],"at":[197],"https://github.com/nikkiwoo-gh/Improved-ITV.":[198]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
