{"id":"https://openalex.org/W4386043768","doi":"https://doi.org/10.48550/arxiv.2308.09363","title":"Open-vocabulary Video Question Answering: A New Benchmark for Evaluating the Generalizability of Video Question Answering Models","display_name":"Open-vocabulary Video Question Answering: A New Benchmark for Evaluating the Generalizability of Video Question Answering Models","publication_year":2023,"publication_date":"2023-08-18","ids":{"openalex":"https://openalex.org/W4386043768","doi":"https://doi.org/10.48550/arxiv.2308.09363"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2308.09363","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09363","pdf_url":"https://arxiv.org/pdf/2308.09363","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2308.09363","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008798023","display_name":"Dohwan Ko","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ko, Dohwan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101482853","display_name":"Ji Soo Lee","orcid":"https://orcid.org/0009-0003-9055-5236"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Ji Soo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111027793","display_name":"Miso Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Miso","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079985802","display_name":"Jaewon Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Jaewon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100740584","display_name":"Jihwan Park","orcid":"https://orcid.org/0009-0007-2223-7278"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jihwan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084814930","display_name":"Hyunwoo J. Kim","orcid":"https://orcid.org/0000-0002-2181-9264"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hyunwoo J.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008798023"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.905457615852356},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7972727417945862},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7927336096763611},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7751063108444214},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6930629014968872},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6736338138580322},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6204836964607239},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5434503555297852},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5341187119483948},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4720234274864197},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41288381814956665},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39329856634140015},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1346403956413269},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12255197763442993}],"concepts":[{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.905457615852356},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7972727417945862},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7927336096763611},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7751063108444214},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6930629014968872},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6736338138580322},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6204836964607239},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5434503555297852},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5341187119483948},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4720234274864197},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41288381814956665},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39329856634140015},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1346403956413269},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12255197763442993},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138496976","wikidata":"https://www.wikidata.org/wiki/Q175002","display_name":"Developmental psychology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2308.09363","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09363","pdf_url":"https://arxiv.org/pdf/2308.09363","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2308.09363","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2308.09363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2308.09363","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09363","pdf_url":"https://arxiv.org/pdf/2308.09363","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1336813462","display_name":null,"funder_award_id":"IITP-2023-2020-0-01819","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G4700831490","display_name":null,"funder_award_id":"2022-","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G5932296353","display_name":null,"funder_award_id":"2022-0-01198","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G8018065544","display_name":null,"funder_award_id":"2020-0-01819","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G8285762245","display_name":null,"funder_award_id":"IITP-2023-2020-0-01819","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"},{"id":"https://openalex.org/F4320337144","display_name":"National Supercomputing Center, Korea Institute of Science and Technology Information","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386043768.pdf","grobid_xml":"https://content.openalex.org/works/W4386043768.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2118717649","https://openalex.org/W2413243053","https://openalex.org/W410723623","https://openalex.org/W2015341305","https://openalex.org/W2035068594","https://openalex.org/W4225593417","https://openalex.org/W2573498121","https://openalex.org/W3022298670","https://openalex.org/W3160494304","https://openalex.org/W2167883292"],"abstract_inverted_index":{"Video":[0,99],"Question":[1,100],"Answering":[2,101],"(VideoQA)":[3],"is":[4,32,223],"a":[5,51,60,95,128,209],"challenging":[6],"task":[7,53],"that":[8,133,184,202],"entails":[9],"complex":[10],"multi-modal":[11],"reasoning.":[12],"In":[13,116],"contrast":[14],"to":[15,20,33,54,78,87,103,120],"multiple-choice":[16],"VideoQA":[17,31,45,49,108,162,216],"which":[18,66],"aims":[19],"predict":[21],"the":[22,27,41,56,76,105,122,135,144,158,191,213],"answer":[23,34,62],"given":[24],"several":[25],"options,":[26],"goal":[28],"of":[29,43,107,215],"open-ended":[30,48,161],"questions":[35],"without":[36],"restricting":[37],"candidate":[38],"answers.":[39,91,115,176,199],"However,":[40],"majority":[42],"previous":[44],"models":[46,109,163,217],"formulate":[47],"as":[50,208],"classification":[52],"classify":[55],"video-question":[57],"pairs":[58],"into":[59,171],"fixed":[61],"set,":[63],"i.e.,":[64],"closed-vocabulary,":[65],"contains":[67],"only":[68,82],"frequent":[69,83],"answers":[70,84,141],"(e.g.,":[71],"top-1000":[72],"answers).":[73],"This":[74],"leads":[75],"model":[77,192],"be":[79],"biased":[80],"toward":[81],"and":[85,113,139,164,174,180,197,218],"fail":[86],"generalize":[88],"on":[89,137,195],"out-of-vocabulary":[90],"We":[92,200],"hence":[93],"propose":[94],"new":[96,154],"benchmark,":[97],"Open-vocabulary":[98],"(OVQA),":[102],"measure":[104],"generalizability":[106,214],"by":[110,142,156,168],"considering":[111],"rare":[112,138,173,196],"unseen":[114,140,175,198],"addition,":[117],"in":[118],"order":[119],"improve":[121,165],"model's":[123],"generalization":[124],"power,":[125],"we":[126,152],"introduce":[127,153],"novel":[129],"GNN-based":[130,186],"soft":[131,187],"verbalizer":[132,188],"enhances":[134],"prediction":[136],"aggregating":[143],"information":[145],"from":[146],"their":[147,166],"similar":[148],"words.":[149],"For":[150],"evaluation,":[151],"baselines":[155],"modifying":[157],"existing":[159],"(closed-vocabulary)":[160],"performances":[167],"further":[169,189],"taking":[170],"account":[172],"Our":[177],"ablation":[178],"studies":[179],"qualitative":[181],"analyses":[182],"demonstrate":[183],"our":[185,203],"improves":[190],"performance,":[193],"especially":[194],"hope":[201],"benchmark":[204],"OVQA":[205],"can":[206],"serve":[207],"guide":[210],"for":[211],"evaluating":[212],"inspire":[219],"future":[220],"research.":[221],"Code":[222],"available":[224],"at":[225],"https://github.com/mlvlab/OVQA.":[226]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-09T15:46:55.921056","created_date":"2023-08-22T00:00:00"}
