{"id":"https://openalex.org/W4407049645","doi":"https://doi.org/10.1145/3711680","title":"Natural Language Understanding and Inference with MLLM in Visual Question Answering: A Survey","display_name":"Natural Language Understanding and Inference with MLLM in Visual Question Answering: A Survey","publication_year":2025,"publication_date":"2025-01-31","ids":{"openalex":"https://openalex.org/W4407049645","doi":"https://doi.org/10.1145/3711680"},"language":"en","primary_location":{"id":"doi:10.1145/3711680","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3711680","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5095084159","display_name":"Jiayi Kuang","orcid":"https://orcid.org/0009-0009-5764-1398"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayi Kuang","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0009-5764-1398","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074799043","display_name":"Ying Shen","orcid":"https://orcid.org/0000-0002-3220-904X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shen","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0000-0002-3220-904X","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089008765","display_name":"Jingyou Xie","orcid":"https://orcid.org/0000-0003-0594-4900"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyou Xie","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0000-0003-0594-4900","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113067512","display_name":"Haohao Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haohao Luo","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0009-0001-9714-0434","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103326189","display_name":"Zhe Xu","orcid":"https://orcid.org/0009-0000-9669-1966"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Xu","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0009-0000-9669-1966","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111899732","display_name":"Ronghao Li","orcid":"https://orcid.org/0009-0009-7174-2638"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ronghao Li","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0009-0009-7174-2638","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100655630","display_name":"Yinghui Li","orcid":"https://orcid.org/0000-0001-7571-6722"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinghui Li","raw_affiliation_strings":["Tsinghua University, Shenzhen China"],"raw_orcid":"https://orcid.org/0000-0001-7571-6722","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112590245","display_name":"Xianfeng Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianfeng Cheng","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0009-0008-8589-9313","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091602832","display_name":"Xika Lin","orcid":"https://orcid.org/0000-0001-6919-7831"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xika Lin","raw_affiliation_strings":["Department of Computer Science, Worcester Polytechnic Institute, Worcester, United States"],"raw_orcid":"https://orcid.org/0000-0001-6919-7831","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Worcester Polytechnic Institute, Worcester, United States","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111940051","display_name":"Yu Han","orcid":"https://orcid.org/0000-0002-7550-1737"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Han","raw_affiliation_strings":["Sun Yat-Sen University, Shenzhen China"],"raw_orcid":"https://orcid.org/0000-0002-7550-1737","affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University, Shenzhen China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":34.6009,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.99829296,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"57","issue":"8","first_page":"1","last_page":"36"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.9122098684310913},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8840633630752563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6328356266021729},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.6064023971557617},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6038722991943359},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5704958438873291},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5671629905700684},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5371938347816467},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4846065938472748},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.47663578391075134},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.4375322163105011}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.9122098684310913},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8840633630752563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6328356266021729},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.6064023971557617},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6038722991943359},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5704958438873291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5671629905700684},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5371938347816467},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4846065938472748},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.47663578391075134},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.4375322163105011},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3711680","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3711680","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":171,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W125693051","https://openalex.org/W627986001","https://openalex.org/W639708223","https://openalex.org/W1522734439","https://openalex.org/W1575833922","https://openalex.org/W1593271688","https://openalex.org/W1686810756","https://openalex.org/W1933349210","https://openalex.org/W2016089260","https://openalex.org/W2016753842","https://openalex.org/W2097117768","https://openalex.org/W2127795553","https://openalex.org/W2151498684","https://openalex.org/W2153579005","https://openalex.org/W2189070436","https://openalex.org/W2194775991","https://openalex.org/W2255466643","https://openalex.org/W2277195237","https://openalex.org/W2396147015","https://openalex.org/W2463565445","https://openalex.org/W2529436507","https://openalex.org/W2546696630","https://openalex.org/W2553418567","https://openalex.org/W2560730294","https://openalex.org/W2561715562","https://openalex.org/W2600463316","https://openalex.org/W2601530120","https://openalex.org/W2613718673","https://openalex.org/W2745461083","https://openalex.org/W2751804245","https://openalex.org/W2775221064","https://openalex.org/W2808877322","https://openalex.org/W2889792105","https://openalex.org/W2892259963","https://openalex.org/W2921873695","https://openalex.org/W2947312908","https://openalex.org/W2962749469","https://openalex.org/W2962779575","https://openalex.org/W2963082899","https://openalex.org/W2963150162","https://openalex.org/W2963176022","https://openalex.org/W2963191264","https://openalex.org/W2963383024","https://openalex.org/W2963398599","https://openalex.org/W2963518342","https://openalex.org/W2963622213","https://openalex.org/W2963644680","https://openalex.org/W2963758027","https://openalex.org/W2963954913","https://openalex.org/W2964138017","https://openalex.org/W2964138343","https://openalex.org/W2964303913","https://openalex.org/W2965628639","https://openalex.org/W2965818302","https://openalex.org/W2966715458","https://openalex.org/W2969862959","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2990138404","https://openalex.org/W2998356391","https://openalex.org/W2998631105","https://openalex.org/W3004349648","https://openalex.org/W3014611590","https://openalex.org/W3034727271","https://openalex.org/W3034787499","https://openalex.org/W3035497460","https://openalex.org/W3035688398","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3093200502","https://openalex.org/W3099700870","https://openalex.org/W3101703188","https://openalex.org/W3109072389","https://openalex.org/W3113204237","https://openalex.org/W3114427170","https://openalex.org/W3115476810","https://openalex.org/W3118641406","https://openalex.org/W3120043490","https://openalex.org/W3120981978","https://openalex.org/W3134930138","https://openalex.org/W3138516171","https://openalex.org/W3155855665","https://openalex.org/W3156470785","https://openalex.org/W3161051151","https://openalex.org/W3173909648","https://openalex.org/W3177174258","https://openalex.org/W3177224328","https://openalex.org/W3184735396","https://openalex.org/W3184784418","https://openalex.org/W3196122027","https://openalex.org/W3199693760","https://openalex.org/W3201068857","https://openalex.org/W3212610063","https://openalex.org/W3212972574","https://openalex.org/W3216130706","https://openalex.org/W4214824212","https://openalex.org/W4221155360","https://openalex.org/W4224440661","https://openalex.org/W4224919571","https://openalex.org/W4225323055","https://openalex.org/W4225533634","https://openalex.org/W4226182655","https://openalex.org/W4226271114","https://openalex.org/W4236965008","https://openalex.org/W4254451051","https://openalex.org/W4281557623","https://openalex.org/W4287113019","https://openalex.org/W4287545460","https://openalex.org/W4294170691","https://openalex.org/W4298392976","https://openalex.org/W4299522971","https://openalex.org/W4312205996","https://openalex.org/W4312784228","https://openalex.org/W4319165821","https://openalex.org/W4323717348","https://openalex.org/W4353113046","https://openalex.org/W4361866031","https://openalex.org/W4365800629","https://openalex.org/W4366330503","https://openalex.org/W4366566341","https://openalex.org/W4366850747","https://openalex.org/W4367018081","https://openalex.org/W4368755685","https://openalex.org/W4377164404","https://openalex.org/W4378473834","https://openalex.org/W4378510496","https://openalex.org/W4378768739","https://openalex.org/W4378942772","https://openalex.org/W4382491206","https://openalex.org/W4385245566","https://openalex.org/W4385574213","https://openalex.org/W4386065596","https://openalex.org/W4386076140","https://openalex.org/W4386794522","https://openalex.org/W4387323789","https://openalex.org/W4387800098","https://openalex.org/W4387968448","https://openalex.org/W4388555312","https://openalex.org/W4388891098","https://openalex.org/W4389073289","https://openalex.org/W4389217430","https://openalex.org/W4389520252","https://openalex.org/W4390041933","https://openalex.org/W4390091503","https://openalex.org/W4391158820","https://openalex.org/W4391293808","https://openalex.org/W4391853818","https://openalex.org/W4393119387","https://openalex.org/W4393782427","https://openalex.org/W4395687449","https://openalex.org/W4395687490","https://openalex.org/W4396912917","https://openalex.org/W4399058921","https://openalex.org/W4399554763","https://openalex.org/W4400181815","https://openalex.org/W4400479973","https://openalex.org/W4401024016","https://openalex.org/W4402402086","https://openalex.org/W4402671595","https://openalex.org/W4402713111","https://openalex.org/W4403808928","https://openalex.org/W4405426003","https://openalex.org/W6634232107","https://openalex.org/W6739901393","https://openalex.org/W6754553780","https://openalex.org/W6839041712","https://openalex.org/W6851592950","https://openalex.org/W6860041859","https://openalex.org/W6870672000","https://openalex.org/W6885071273"],"related_works":["https://openalex.org/W3157284875","https://openalex.org/W2147241511","https://openalex.org/W2259406085","https://openalex.org/W1984061923","https://openalex.org/W2099715052","https://openalex.org/W4226247999","https://openalex.org/W3090872036","https://openalex.org/W3209772662","https://openalex.org/W2152164004","https://openalex.org/W4200629926"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"is":[4,35],"a":[5,20,46],"challenge":[6],"task":[7,23],"that":[8],"combines":[9],"natural":[10,63],"language":[11,27,64,106],"processing":[12],"and":[13,17,45,68,95,103,128,141,145],"computer":[14],"vision":[15],"techniques":[16],"gradually":[18],"becomes":[19],"benchmark":[21],"test":[22],"in":[24,93,108,119],"multimodal":[25,104],"large":[26,105],"models":[28,52,102,107],"(MLLMs).":[29],"The":[30],"goal":[31],"of":[32,40,43,49,62,66,116,125,131,139],"our":[33],"survey":[34,57],"to":[36],"provide":[37],"an":[38,59],"overview":[39],"the":[41,50,73,82,114,123,129,137],"development":[42],"VQA":[44,84,120,140],"detailed":[47],"description":[48],"latest":[51],"with":[53,99],"high":[54],"timeliness.":[55],"This":[56],"gives":[58],"up-to-date":[60],"synthesis":[61],"understanding":[65],"images":[67],"text,":[69],"as":[70,72],"well":[71],"knowledge":[74,117,127],"reasoning":[75,118],"module":[76],"based":[77],"on":[78,81,90],"image-question":[79],"information":[80,98],"core":[83],"tasks.":[85],"In":[86],"addition,":[87],"we":[88,135],"elaborate":[89],"recent":[91],"advances":[92],"extracting":[94],"fusing":[96],"modal":[97],"vision-language":[100],"pretraining":[101],"VQA.":[109],"We":[110],"also":[111],"exhaustively":[112],"review":[113],"progress":[115],"by":[121],"detailing":[122],"extraction":[124],"internal":[126],"introduction":[130],"external":[132],"knowledge.":[133],"Finally,":[134],"present":[136],"datasets":[138],"different":[142],"evaluation":[143],"metrics":[144],"discuss":[146],"possible":[147],"directions":[148],"for":[149],"future":[150],"work.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":16},{"year":2025,"cited_by_count":22}],"updated_date":"2026-06-16T09:24:06.705377","created_date":"2025-10-10T00:00:00"}
