{"id":"https://openalex.org/W2751525844","doi":"https://doi.org/10.1109/tip.2017.2746267","title":"Unifying the Video and Question Attentions for Open-Ended Video Question Answering","display_name":"Unifying the Video and Question Attentions for Open-Ended Video Question Answering","publication_year":2017,"publication_date":"2017-08-28","ids":{"openalex":"https://openalex.org/W2751525844","doi":"https://doi.org/10.1109/tip.2017.2746267","mag":"2751525844","pmid":"https://pubmed.ncbi.nlm.nih.gov/28866494"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2017.2746267","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2017.2746267","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088930335","display_name":"Hongyang Xue","orcid":"https://orcid.org/0000-0003-3161-3566"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyang Xue","raw_affiliation_strings":["State Key Laboratory of CAD&CG, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CAD&CG, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079260216","display_name":"Zhou Zhao","orcid":"https://orcid.org/0000-0001-6121-0384"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037942269","display_name":"Deng Cai","orcid":"https://orcid.org/0000-0001-9817-4065"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deng Cai","raw_affiliation_strings":["State Key Laboratory of CAD&CG, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CAD&CG, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088930335"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":2.6766,"has_fulltext":false,"cited_by_count":65,"citation_normalized_percentile":{"value":0.94382602,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"26","issue":"12","first_page":"5656","last_page":"5666"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8620138168334961},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8497627973556519},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6020519733428955},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5974797010421753},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4985630512237549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4881032407283783},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.41966211795806885},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4137924313545227}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8620138168334961},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8497627973556519},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6020519733428955},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5974797010421753},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4985630512237549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4881032407283783},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.41966211795806885},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4137924313545227},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2017.2746267","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2017.2746267","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:28866494","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28866494","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5616582905","display_name":null,"funder_award_id":"2013CB336500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1522734439","https://openalex.org/W1531374185","https://openalex.org/W1575833922","https://openalex.org/W1586939924","https://openalex.org/W1614298861","https://openalex.org/W1686810756","https://openalex.org/W1895577753","https://openalex.org/W1895641373","https://openalex.org/W1933349210","https://openalex.org/W1983927101","https://openalex.org/W2058556535","https://openalex.org/W2064675550","https://openalex.org/W2090048052","https://openalex.org/W2130942839","https://openalex.org/W2136480620","https://openalex.org/W2139501017","https://openalex.org/W2151498684","https://openalex.org/W2174492417","https://openalex.org/W2189070436","https://openalex.org/W2202226326","https://openalex.org/W2255577267","https://openalex.org/W2288052783","https://openalex.org/W2530794863","https://openalex.org/W2536013558","https://openalex.org/W2606982687","https://openalex.org/W2886194558","https://openalex.org/W2962749469","https://openalex.org/W2963293463","https://openalex.org/W2963656855","https://openalex.org/W2963672682","https://openalex.org/W2963890755","https://openalex.org/W2963954913","https://openalex.org/W2964121744","https://openalex.org/W2964138017","https://openalex.org/W3023993913","https://openalex.org/W4249179470","https://openalex.org/W4285719527","https://openalex.org/W4298033729","https://openalex.org/W4298392976","https://openalex.org/W6631190155","https://openalex.org/W6631875545","https://openalex.org/W6634232107","https://openalex.org/W6636501129","https://openalex.org/W6636510571","https://openalex.org/W6679436768","https://openalex.org/W6682086655","https://openalex.org/W6686883664","https://openalex.org/W6730666313","https://openalex.org/W6736741030","https://openalex.org/W6753821884"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W4386392971","https://openalex.org/W2027108423","https://openalex.org/W1855666948","https://openalex.org/W2758561209","https://openalex.org/W1548095260","https://openalex.org/W2781711915","https://openalex.org/W2112817590","https://openalex.org/W1555291398"],"abstract_inverted_index":{"Video":[0],"question":[1,17,61,89,101,141],"answering":[2,18],"is":[3,28],"an":[4],"important":[5],"task":[6,56],"toward":[7],"scene":[8],"understanding":[9],"and":[10,33,99,113,119,139],"visual":[11,16,35],"data":[12,36,81,165],"retrieval.":[13],"However,":[14],"current":[15],"works":[19],"mainly":[20],"focus":[21],"on":[22,111,162],"a":[23,80,153],"single":[24],"static":[25],"image,":[26],"which":[27],"distinct":[29],"from":[30,149],"the":[31,38,45,55,71,87,108,117,123,131,137,140,144,156,163,171],"dynamic":[32],"sequential":[34,96,118],"in":[37,48,75],"real":[39],"world.":[40],"Their":[41],"approaches":[42],"cannot":[43],"utilize":[44],"temporal":[46,100,120],"information":[47],"videos.":[49],"In":[50,155],"this":[51],"paper,":[52],"we":[53,93,158],"introduce":[54],"of":[57,122,133,173],"free-form":[58],"open-ended":[59,64,84],"video":[60,97,138],"answering.":[62],"The":[63,125,167],"answers":[65,145],"enable":[66],"wider":[67],"applications":[68],"compared":[69],"with":[70,86],"common":[72],"multiple-choice":[73],"tasks":[74],"Visual-QA.":[76],"We":[77],"first":[78],"propose":[79,94],"set":[82],"for":[83],"Video-QA":[85],"automatic":[88],"generation":[90],"approaches.":[91],"Then,":[92],"our":[95,150,160,174],"attention":[98,102,109],"models.":[103],"These":[104],"two":[105,126],"models":[106,127,151,161],"apply":[107],"mechanism":[110],"videos":[112],"questions,":[114],"while":[115],"preserving":[116],"structures":[121],"guides.":[124],"are":[128,142,146],"integrated":[129],"into":[130],"model":[132],"unified":[134],"attention.":[135],"After":[136],"encoded,":[143],"generated":[147],"wordwisely":[148],"by":[152],"decoder.":[154],"end,":[157],"evaluate":[159],"proposed":[164,175],"set.":[166],"experimental":[168],"results":[169],"demonstrate":[170],"effectiveness":[172],"model.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":6}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
