{"id":"https://openalex.org/W7125917918","doi":"https://doi.org/10.1109/smc58881.2025.11342816","title":"FIQ: Fundamental Question Generation with the Integration of Question Embeddings for Video Question Answering","display_name":"FIQ: Fundamental Question Generation with the Integration of Question Embeddings for Video Question Answering","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125917918","doi":"https://doi.org/10.1109/smc58881.2025.11342816"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342816","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342816","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124078737","display_name":"Juyoung Oh","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Juyoung Oh","raw_affiliation_strings":["Korea University,Department of Artificial Intelligence,Seoul,Korea,02841"],"affiliations":[{"raw_affiliation_string":"Korea University,Department of Artificial Intelligence,Seoul,Korea,02841","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010759442","display_name":"Ho-Joong Kim","orcid":"https://orcid.org/0000-0003-4200-5136"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ho-Joong Kim","raw_affiliation_strings":["Korea University,Department of Artificial Intelligence,Seoul,Korea,02841"],"affiliations":[{"raw_affiliation_string":"Korea University,Department of Artificial Intelligence,Seoul,Korea,02841","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124139481","display_name":"Seong-Whan Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seong-Whan Lee","raw_affiliation_strings":["Korea University,Department of Artificial Intelligence,Seoul,Korea,02841"],"affiliations":[{"raw_affiliation_string":"Korea University,Department of Artificial Intelligence,Seoul,Korea,02841","institution_ids":["https://openalex.org/I197347611"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124078737"],"corresponding_institution_ids":["https://openalex.org/I197347611"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.7273433,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"972","last_page":"977"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0017000000225380063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8342999815940857},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6241999864578247},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5860000252723694},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5557000041007996},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5454000234603882},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5382000207901001},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5311999917030334},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.48969998955726624}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8342999815940857},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019000291824341},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6241999864578247},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5860000252723694},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5687999725341797},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5557000041007996},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5454000234603882},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5382000207901001},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5311999917030334},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4438999891281128},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.38499999046325684},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29760000109672546},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2856999933719635},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2680000066757202},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2542000114917755},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342816","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342816","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2011013898","https://openalex.org/W2031825886","https://openalex.org/W2083677758","https://openalex.org/W2104558951","https://openalex.org/W2129169401","https://openalex.org/W2990503944","https://openalex.org/W3034188538","https://openalex.org/W3198377975","https://openalex.org/W4287887134","https://openalex.org/W4312651322","https://openalex.org/W4385565374","https://openalex.org/W4386065596","https://openalex.org/W4386072441","https://openalex.org/W4386790226","https://openalex.org/W4390873290","https://openalex.org/W4393147072","https://openalex.org/W4401043036","https://openalex.org/W4402727142","https://openalex.org/W4402816833","https://openalex.org/W4403906532","https://openalex.org/W4404770348","https://openalex.org/W4404783518","https://openalex.org/W4404784276"],"related_works":[],"abstract_inverted_index":{"Video":[0],"question":[1,25,98,104,108,175],"answering":[2,109],"(VQA)":[3],"is":[4,45,210],"a":[5,13,17,76,96,111,169],"multimodal":[6],"task":[7],"that":[8,172,181,198],"requires":[9],"the":[10,32,50,71,83,101,117,121,125,141,152,156,189],"interpretation":[11],"of":[12,35,53,58,103,120,128,191],"video":[14,36,107],"to":[15,30,48,73,115,154,160,187,205],"answer":[16,27],"given":[18],"question.":[19],"Existing":[20],"VQA":[21],"methods":[22],"primarily":[23],"utilize":[24],"and":[26,67,88,163],"(Q&A)":[28],"pairs":[29,133,150],"learn":[31],"spatio-temporal":[33],"characteristics":[34],"content.":[37],"However,":[38],"these":[39],"annotations":[40],"are":[41,185],"typically":[42],"event-centric,":[43],"which":[44],"not":[46],"enough":[47],"capture":[49],"broader":[51],"context":[52],"each":[54],"video.":[55],"The":[56],"absence":[57],"essential":[59,182],"details":[60,184],"such":[61],"as":[62],"object":[63],"types,":[64],"spatial":[65],"layouts,":[66],"descriptive":[68],"attributes":[69],"restricts":[70],"model":[72,122,153],"learning":[74],"only":[75],"fragmented":[77],"scene":[78,146],"representation.":[79],"This":[80],"issue":[81],"limits":[82],"model\u2019s":[84],"capacity":[85],"for":[86,106],"generalization":[87],"higher-level":[89],"reasoning.":[90],"In":[91],"this":[92],"paper,":[93],"we":[94,167],"propose":[95],"fundamental":[97,126,145],"generation":[99],"with":[100,144,177],"integration":[102],"embeddings":[105,176],"(FIQ),":[110],"novel":[112],"approach":[113],"designed":[114],"strengthen":[116],"reasoning":[118,164],"ability":[119],"by":[123],"enhancing":[124],"understanding":[127],"videos.":[129],"FIQ":[130,200],"generates":[131],"Q&A":[132,149],"based":[134],"on":[135,195],"descriptions":[136],"extracted":[137],"from":[138],"videos,":[139],"enriching":[140],"training":[142],"data":[143],"information.":[147],"Generated":[148],"enable":[151],"understand":[155],"primary":[157],"context,":[158],"leading":[159],"enhanced":[161],"generalizability":[162],"ability.":[165],"Furthermore,":[166],"incorporate":[168],"VQ-CAlign":[170],"module":[171],"assists":[173],"task-specific":[174],"visual":[178],"features,":[179],"ensuring":[180],"domain-specific":[183],"preserved":[186],"increase":[188],"adaptability":[190],"downstream":[192],"tasks.":[193],"Experiments":[194],"SUTD-TrafficQA":[196],"demonstrate":[197],"our":[199],"achieves":[201],"state-of-the-art":[202],"performance":[203],"compared":[204],"existing":[206],"baseline":[207],"methods.":[208],"Code":[209],"available":[211],"at":[212],"https://github.com/juyoungohjulie/FIQ":[213]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-29T00:00:00"}
