{"id":"https://openalex.org/W4385484644","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191453","title":"Improving Visual Question Answering by Multimodal Gate Fusion Network","display_name":"Improving Visual Question Answering by Multimodal Gate Fusion Network","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385484644","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191453"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191453","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10191453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101230027","display_name":"Shenxiang Xiang","orcid":null},"institutions":[{"id":"https://openalex.org/I1328775524","display_name":"Zhejiang Sci-Tech University","ror":"https://ror.org/03893we55","country_code":"CN","type":"education","lineage":["https://openalex.org/I1328775524"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shenxiang Xiang","raw_affiliation_strings":["School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018","institution_ids":["https://openalex.org/I1328775524"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103114865","display_name":"Qiaohong Chen","orcid":"https://orcid.org/0009-0009-7334-6284"},"institutions":[{"id":"https://openalex.org/I1328775524","display_name":"Zhejiang Sci-Tech University","ror":"https://ror.org/03893we55","country_code":"CN","type":"education","lineage":["https://openalex.org/I1328775524"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaohong Chen","raw_affiliation_strings":["School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018","institution_ids":["https://openalex.org/I1328775524"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060658571","display_name":"Xian Fang","orcid":"https://orcid.org/0000-0001-5161-2574"},"institutions":[{"id":"https://openalex.org/I1328775524","display_name":"Zhejiang Sci-Tech University","ror":"https://ror.org/03893we55","country_code":"CN","type":"education","lineage":["https://openalex.org/I1328775524"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xian Fang","raw_affiliation_strings":["School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018","institution_ids":["https://openalex.org/I1328775524"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101135616","display_name":"Meng-Hao Guo","orcid":"https://orcid.org/0000-0002-4128-4594"},"institutions":[{"id":"https://openalex.org/I1328775524","display_name":"Zhejiang Sci-Tech University","ror":"https://ror.org/03893we55","country_code":"CN","type":"education","lineage":["https://openalex.org/I1328775524"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Menghao Guo","raw_affiliation_strings":["School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Zhejiang Sci-Tech University,Hangzhou,China,310018","institution_ids":["https://openalex.org/I1328775524"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101230027"],"corresponding_institution_ids":["https://openalex.org/I1328775524"],"apc_list":null,"apc_paid":null,"fwci":0.2469,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.51515794,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"28","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8147437572479248},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.7400723695755005},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6572490930557251},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6362612843513489},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.580958366394043},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5199043154716492},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.49615249037742615},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.49003899097442627},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4894050657749176},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4829506278038025},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.444757878780365},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42863112688064575},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38168853521347046},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3281779885292053},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3229105770587921},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.2446584403514862}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8147437572479248},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.7400723695755005},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6572490930557251},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6362612843513489},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.580958366394043},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5199043154716492},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.49615249037742615},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.49003899097442627},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4894050657749176},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4829506278038025},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.444757878780365},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42863112688064575},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38168853521347046},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3281779885292053},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3229105770587921},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2446584403514862},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191453","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10191453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6000000238418579,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W854541894","https://openalex.org/W1575833922","https://openalex.org/W1686810756","https://openalex.org/W1933349210","https://openalex.org/W2053101950","https://openalex.org/W2064675550","https://openalex.org/W2133564696","https://openalex.org/W2157331557","https://openalex.org/W2174492417","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2463565445","https://openalex.org/W2546696630","https://openalex.org/W2553418567","https://openalex.org/W2560730294","https://openalex.org/W2745461083","https://openalex.org/W2747623286","https://openalex.org/W2951527505","https://openalex.org/W2955425717","https://openalex.org/W2963521239","https://openalex.org/W2963954913","https://openalex.org/W2964216663","https://openalex.org/W2966683369","https://openalex.org/W2970231061","https://openalex.org/W2986670728","https://openalex.org/W3092767330","https://openalex.org/W3126792443","https://openalex.org/W3186187670","https://openalex.org/W3199287825","https://openalex.org/W3199977630","https://openalex.org/W4283216282","https://openalex.org/W4292945941","https://openalex.org/W4312821800","https://openalex.org/W4312971868","https://openalex.org/W4313167917","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6623517193","https://openalex.org/W6634232107","https://openalex.org/W6637373629","https://openalex.org/W6640773114","https://openalex.org/W6679434410","https://openalex.org/W6682137061","https://openalex.org/W6685520387","https://openalex.org/W6719057275","https://openalex.org/W6729948091","https://openalex.org/W6739901393","https://openalex.org/W6752083267","https://openalex.org/W6762718338","https://openalex.org/W6789753369","https://openalex.org/W6842542540"],"related_works":["https://openalex.org/W3000097931","https://openalex.org/W2354322770","https://openalex.org/W4237547500","https://openalex.org/W1570848052","https://openalex.org/W2373192430","https://openalex.org/W4239268388","https://openalex.org/W4243305035","https://openalex.org/W1537496349","https://openalex.org/W2379407973","https://openalex.org/W2350267540"],"abstract_inverted_index":{"Visual":[0],"question":[1],"answering":[2,11],"(VQA)":[3],"is":[4,92],"a":[5,17,66,82,154],"difficult":[6],"multimodal":[7,68,83],"task":[8],"that":[9,148],"requires":[10,16],"questions":[12],"about":[13],"images.":[14],"It":[15],"fine-grained":[18],"level":[19],"of":[20,22,27,34,39,75,90,121,140,161,189],"understanding":[21],"both":[23,149],"the":[24,28,31,35,40,61,118,133,138,159,162,173,179,183,192,198],"visual":[25,114,124],"content":[26,33],"image":[29],"and":[30,49,81,96,99,113,123,142,151,195],"textual":[32,112,122],"question.":[36],"However,":[37],"most":[38],"existing":[41],"models":[42],"perform":[43],"weakly":[44],"in":[45,157],"filtering":[46],"noisy":[47],"information":[48],"are":[50],"unable":[51],"to":[52,93,100,136],"fuse":[53,111],"features":[54,115],"from":[55],"multiple":[56],"modalities":[57],"effectively.":[58],"To":[59],"resolve":[60],"above":[62],"restriction,":[63],"we":[64],"propose":[65],"novel":[67],"gate":[69,84],"fusion":[70,85],"network":[71],"(MGFN),":[72],"which":[73],"consists":[74],"an":[76,186],"attention-on-attention":[77],"interaction":[78],"module":[79,86],"(AoAIM)":[80],"(MGFM).":[87],"The":[88,106,144],"role":[89,156],"AoAIM":[91,141,150],"capture":[94],"intra-modal":[95],"inter-modal":[97],"dependencies":[98],"filter":[101],"out":[102],"some":[103],"irrelevant":[104],"attention.":[105],"proposed":[107],"MGFM":[108,152],"can":[109],"effectively":[110],"based":[116],"on":[117,132,178,191,197],"relative":[119],"importance":[120],"modalities.":[125],"We":[126],"have":[127],"performed":[128],"many":[129],"ablation":[130,145],"experiments":[131,146],"VQA-v2":[134,180],"dataset":[135],"validate":[137],"effectiveness":[139],"MGFM.":[143],"demonstrate":[147],"play":[153],"key":[155],"improving":[158],"performance":[160],"model.":[163],"By":[164],"embedding":[165],"these":[166],"two":[167],"modules,":[168],"MGFN":[169,184],"performs":[170],"better":[171],"than":[172],"previous":[174],"state-of-the-art":[175],"(SOTA)":[176],"model":[177],"dataset.":[181],"Particularly,":[182],"achieves":[185],"overall":[187],"accuracy":[188],"71.68%":[190],"test-dev":[193],"set":[194],"72.12%":[196],"test-std":[199],"set.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
