{"id":"https://openalex.org/W4403332929","doi":"https://doi.org/10.1145/3686490.3686507","title":"Video question answering algorithm that integrates video-level and clip-level features by introducing Multi-head attention units","display_name":"Video question answering algorithm that integrates video-level and clip-level features by introducing Multi-head attention units","publication_year":2024,"publication_date":"2024-07-12","ids":{"openalex":"https://openalex.org/W4403332929","doi":"https://doi.org/10.1145/3686490.3686507"},"language":"en","primary_location":{"id":"doi:10.1145/3686490.3686507","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3686490.3686507","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 7th International Conference on Signal Processing and Machine Learning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102109011","display_name":"Ting Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I202126657","display_name":"Qufu Normal University","ror":"https://ror.org/03ceheh96","country_code":"CN","type":"education","lineage":["https://openalex.org/I202126657"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ting Zhao","raw_affiliation_strings":["School of Cyber Science and Engineering, Qufu Normal University, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Qufu Normal University, China","institution_ids":["https://openalex.org/I202126657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009590411","display_name":"Yude Wang","orcid":"https://orcid.org/0000-0002-2458-7817"},"institutions":[{"id":"https://openalex.org/I202126657","display_name":"Qufu Normal University","ror":"https://ror.org/03ceheh96","country_code":"CN","type":"education","lineage":["https://openalex.org/I202126657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yude Wang","raw_affiliation_strings":["School of Cyber Science and Engineering, Qufu Normal University, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Qufu Normal University, China","institution_ids":["https://openalex.org/I202126657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101668359","display_name":"Fei Song","orcid":null},"institutions":[{"id":"https://openalex.org/I202126657","display_name":"Qufu Normal University","ror":"https://ror.org/03ceheh96","country_code":"CN","type":"education","lineage":["https://openalex.org/I202126657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Song","raw_affiliation_strings":["School of Cyber Science and Engineering, Qufu Normal University, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Qufu Normal University, China","institution_ids":["https://openalex.org/I202126657"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101384371","display_name":"Teng Liu","orcid":"https://orcid.org/0009-0004-3203-9804"},"institutions":[{"id":"https://openalex.org/I202126657","display_name":"Qufu Normal University","ror":"https://ror.org/03ceheh96","country_code":"CN","type":"education","lineage":["https://openalex.org/I202126657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Teng Liu","raw_affiliation_strings":["School of Cyber Science and Engineering, Qufu Normal University, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Qufu Normal University, China","institution_ids":["https://openalex.org/I202126657"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102109011"],"corresponding_institution_ids":["https://openalex.org/I202126657"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17975387,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"111","last_page":"119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7692925930023193},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.6188913583755493},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5382774472236633},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5114766359329224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48429885506629944},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3286205530166626},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.320790559053421}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7692925930023193},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.6188913583755493},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5382774472236633},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5114766359329224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48429885506629944},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3286205530166626},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.320790559053421},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3686490.3686507","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3686490.3686507","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 7th International Conference on Signal Processing and Machine Learning","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1981276685","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2549139847","https://openalex.org/W2954199749","https://openalex.org/W2962949233","https://openalex.org/W3010593057","https://openalex.org/W3034730770","https://openalex.org/W3035218869","https://openalex.org/W3035419698","https://openalex.org/W3049310425","https://openalex.org/W3167092180","https://openalex.org/W3181758331","https://openalex.org/W3194340077","https://openalex.org/W3197457832","https://openalex.org/W6600103761","https://openalex.org/W6601674988","https://openalex.org/W6602740540","https://openalex.org/W6603612406","https://openalex.org/W6609871788","https://openalex.org/W6697328973"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2051487156","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2073681303","https://openalex.org/W2349927912"],"abstract_inverted_index":{"In":[0],"order":[1],"to":[2,39,64,86,92],"solve":[3],"the":[4,43,50,68,74,80,87,90,94,100,107,119,129,140,148,164,172,179,182,189,197],"problems":[5],"of":[6,8,58,73,79,99,112,181,199],"lack":[7],"video-level":[9],"and":[10,14,41,47,53,66,76,97,115,122,125,136,150,155,160,185,188,202],"clip-level":[11,84],"feature":[12,17],"fusion":[13],"insufficient":[15],"multi-modal":[16],"processing":[18],"in":[19,106,118,128,147,174],"existing":[20],"video":[21,30,54,75,183,200],"question":[22,31,124,184,201],"answering":[23,32],"algorithms,":[24],"this":[25,175],"paper":[26,108,176],"proposes":[27],"a":[28],"multi-level":[29],"algorithm":[33,173],"that":[34,171],"uses":[35],"multi-head":[36,59],"attention":[37,60],"units":[38,61],"process":[40,65],"fuse":[42,67],"relationship":[44],"between":[45],"videos":[46],"questions.":[48],"At":[49],"clip":[51],"level":[52],"level,":[55],"two":[56],"layers":[57],"are":[62,153,158],"used":[63],"appearance":[69],"features,":[70],"motion":[71],"features":[72,78,85],"text":[77],"question,":[81],"integrate":[82],"processed":[83],"video-level,":[88],"allowing":[89],"model":[91,142,166],"capture":[93],"global":[95],"context":[96],"structure":[98],"entire":[101],"video.":[102],"The":[103,144],"method":[104],"proposed":[105],"achieved":[109],"accuracy":[110,145,180],"rates":[111,146],"76.42%,":[113],"82.61%":[114],"57.94%":[116],"respectively":[117],"action,":[120],"transition":[121],"frame":[123],"answer":[126,186,203],"tasks":[127],"TGIF-QA":[130],"dataset,":[131],"which":[132,157],"were":[133],"1.42%,":[134],"1.21%":[135],"2.04%":[137],"higher":[138,162],"than":[139,163],"baseline":[141,165],"respectively.":[143,167],"MSRVTT-QA":[149],"MSVD-QA":[151],"datasets":[152],"36.13%":[154],"37.32%,":[156],"1.22%":[159],"0.53%":[161],"Experimental":[168],"results":[169],"show":[170],"effectively":[177],"improves":[178],"algorithm,":[187],"research":[190],"work":[191],"can":[192],"provide":[193],"theoretical":[194],"support":[195],"for":[196],"implementation":[198],"technology.":[204]},"counts_by_year":[],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
