{"id":"https://openalex.org/W7125952960","doi":"https://doi.org/10.1109/smc58881.2025.11342922","title":"MGQA: Mixture Gaussian for Video Grounded Question Answering via VLMs","display_name":"MGQA: Mixture Gaussian for Video Grounded Question Answering via VLMs","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125952960","doi":"https://doi.org/10.1109/smc58881.2025.11342922"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342922","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124091123","display_name":"Zhixian He","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhixian He","raw_affiliation_strings":["Sun Yat-sen University,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091598240","display_name":"Xiaofan Ma","orcid":"https://orcid.org/0009-0005-1555-7279"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofan Ma","raw_affiliation_strings":["Sun Yat-sen University,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124149863","display_name":"Qiushi Li","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiushi Li","raw_affiliation_strings":["Sun Yat-sen University,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123515932","display_name":"S Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shujin Lin","raw_affiliation_strings":["Sun Yat-sen University,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5124091123"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71701488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3108","last_page":"3113"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0006000000284984708,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7206000089645386},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6564000248908997},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.597100019454956},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.5802000164985657},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5608000159263611},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4828000068664551},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.48260000348091125},{"id":"https://openalex.org/keywords/gaussian-network-model","display_name":"Gaussian network model","score":0.39800000190734863}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7206000089645386},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6995000243186951},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6564000248908997},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.597100019454956},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.5802000164985657},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5716000199317932},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5608000159263611},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4828000068664551},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.48260000348091125},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4271000027656555},{"id":"https://openalex.org/C166550679","wikidata":"https://www.wikidata.org/wiki/Q263400","display_name":"Gaussian network model","level":3,"score":0.39800000190734863},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3783000111579895},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2962000072002411},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29429998993873596},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2791000008583069},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.26570001244544983},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26159998774528503},{"id":"https://openalex.org/C167928553","wikidata":"https://www.wikidata.org/wiki/Q1376021","display_name":"Estimation theory","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342922","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W3166608351","https://openalex.org/W3172942063","https://openalex.org/W3175961224","https://openalex.org/W4312246181","https://openalex.org/W4312974690","https://openalex.org/W4383112678","https://openalex.org/W4386076176","https://openalex.org/W4389524050","https://openalex.org/W4390190606","https://openalex.org/W4402727520","https://openalex.org/W4402727819","https://openalex.org/W4402753952","https://openalex.org/W4402777385","https://openalex.org/W4403791756","https://openalex.org/W4404784276","https://openalex.org/W4407689082","https://openalex.org/W4409346443","https://openalex.org/W4412376987","https://openalex.org/W4415798265"],"related_works":[],"abstract_inverted_index":{"Video":[0],"question":[1],"answering":[2],"has":[3],"become":[4],"a":[5,43,67],"cornerstone":[6],"task":[7],"for":[8],"evaluating":[9],"vision":[10],"language":[11],"models.":[12],"However,":[13],"existing":[14,85],"models":[15,40,86],"often":[16],"fail":[17],"to":[18],"ground":[19],"their":[20],"answers":[21],"in":[22],"relevant":[23],"visual":[24],"evidence":[25],"or":[26],"incorrectly":[27],"model":[28],"distributions":[29],"during":[30],"localization.":[31],"To":[32],"address":[33],"this":[34],"limitation,":[35],"we":[36],"propose":[37],"MGQA,":[38],"which":[39],"videos":[41],"as":[42,66],"sequence":[44],"of":[45,102],"discrete":[46],"events":[47],"using":[48],"mixture":[49],"Gaussian":[50,54],"distributions,":[51],"with":[52,87],"each":[53],"characterized":[55],"by":[56],"its":[57],"center,":[58],"range,":[59],"and":[60,71,96],"weight.":[61],"MGQA":[62],"leverages":[63],"question-answering":[64],"accuracy":[65],"weak":[68],"supervision":[69],"signal":[70],"incorporates":[72],"two":[73],"additional":[74],"Gaussian-related":[75],"loss":[76],"functions.":[77],"The":[78],"method":[79],"can":[80],"be":[81],"easily":[82],"integrated":[83],"into":[84],"negligible":[88],"parameter":[89],"overhead.":[90],"Experiments":[91],"conducted":[92],"on":[93],"the":[94,100],"NExT-GQA":[95],"ReX-Time":[97],"datasets":[98],"demonstrate":[99],"effectiveness":[101],"our":[103],"proposed":[104],"method.":[105]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
