{"id":"https://openalex.org/W4387717642","doi":"https://doi.org/10.1109/tsmc.2023.3319964","title":"So Many Heads, So Many Wits: Multimodal Graph Reasoning for Text-Based Visual Question Answering","display_name":"So Many Heads, So Many Wits: Multimodal Graph Reasoning for Text-Based Visual Question Answering","publication_year":2023,"publication_date":"2023-10-17","ids":{"openalex":"https://openalex.org/W4387717642","doi":"https://doi.org/10.1109/tsmc.2023.3319964"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2023.3319964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2023.3319964","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088111638","display_name":"Wenbo Zheng","orcid":"https://orcid.org/0000-0001-9732-3217"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenbo Zheng","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100306850","display_name":"Lan Yan","orcid":"https://orcid.org/0000-0001-6452-9649"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lan Yan","raw_affiliation_strings":["College of Computer Science and Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113600509","display_name":"Fei\u2010Yue Wang","orcid":"https://orcid.org/0000-0001-9185-3989"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei-Yue Wang","raw_affiliation_strings":["State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088111638"],"corresponding_institution_ids":["https://openalex.org/I196699116"],"apc_list":null,"apc_paid":null,"fwci":0.3571,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60364334,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"54","issue":"2","first_page":"854","last_page":"865"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7232149839401245},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6642406582832336},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5755113363265991},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.54995197057724},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5437837839126587},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5217581391334534},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4725964665412903},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41987931728363037},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3943779468536377},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2165786325931549}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7232149839401245},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6642406582832336},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5755113363265991},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.54995197057724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5437837839126587},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5217581391334534},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4725964665412903},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41987931728363037},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3943779468536377},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2165786325931549},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2023.3319964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2023.3319964","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8600000143051147,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1349796033","display_name":null,"funder_award_id":"2018AAA0101502","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G1431632191","display_name":null,"funder_award_id":"BX20230114","funder_id":"https://openalex.org/F4320335768","funder_display_name":"National Postdoctoral Program for Innovative Talents"},{"id":"https://openalex.org/G1781927017","display_name":null,"funder_award_id":"62303361","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3605390186","display_name":null,"funder_award_id":"623QN266","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"},{"id":"https://openalex.org/G4192612216","display_name":null,"funder_award_id":"WUT: 233110002","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G6929903567","display_name":null,"funder_award_id":"62302161","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322866","display_name":"Natural Science Foundation of Hainan Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335768","display_name":"National Postdoctoral Program for Innovative Talents","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":110,"referenced_works":["https://openalex.org/W2008806374","https://openalex.org/W2012689760","https://openalex.org/W2053317383","https://openalex.org/W2090048052","https://openalex.org/W2108598243","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2493916176","https://openalex.org/W2536981762","https://openalex.org/W2562512475","https://openalex.org/W2809273748","https://openalex.org/W2896457183","https://openalex.org/W2946794439","https://openalex.org/W2955259376","https://openalex.org/W2963091558","https://openalex.org/W2963206679","https://openalex.org/W2963319519","https://openalex.org/W2964051675","https://openalex.org/W2965373594","https://openalex.org/W2965512000","https://openalex.org/W2969266941","https://openalex.org/W2979382951","https://openalex.org/W2981899103","https://openalex.org/W2988326850","https://openalex.org/W2992463140","https://openalex.org/W3004082545","https://openalex.org/W3004268082","https://openalex.org/W3005769002","https://openalex.org/W3029273706","https://openalex.org/W3034205192","https://openalex.org/W3034291519","https://openalex.org/W3034336960","https://openalex.org/W3034502973","https://openalex.org/W3034599802","https://openalex.org/W3034726550","https://openalex.org/W3034943799","https://openalex.org/W3035517717","https://openalex.org/W3043311956","https://openalex.org/W3077892130","https://openalex.org/W3087338569","https://openalex.org/W3092721056","https://openalex.org/W3093006710","https://openalex.org/W3093385053","https://openalex.org/W3094071022","https://openalex.org/W3095309002","https://openalex.org/W3100786684","https://openalex.org/W3101703188","https://openalex.org/W3102039465","https://openalex.org/W3104645229","https://openalex.org/W3104893896","https://openalex.org/W3104929588","https://openalex.org/W3106728613","https://openalex.org/W3107634219","https://openalex.org/W3108319047","https://openalex.org/W3110661548","https://openalex.org/W3115287481","https://openalex.org/W3120043490","https://openalex.org/W3123089036","https://openalex.org/W3168834895","https://openalex.org/W3169978599","https://openalex.org/W3173038784","https://openalex.org/W3173909648","https://openalex.org/W3174105824","https://openalex.org/W3174851253","https://openalex.org/W3176851559","https://openalex.org/W3177224328","https://openalex.org/W3181159501","https://openalex.org/W3193171560","https://openalex.org/W3197857628","https://openalex.org/W3198196812","https://openalex.org/W3201422581","https://openalex.org/W3202839357","https://openalex.org/W3205050305","https://openalex.org/W3205209899","https://openalex.org/W3207847779","https://openalex.org/W3213831041","https://openalex.org/W3215381707","https://openalex.org/W3215633354","https://openalex.org/W3216857888","https://openalex.org/W4220698980","https://openalex.org/W4221165960","https://openalex.org/W4283069548","https://openalex.org/W4290713716","https://openalex.org/W4293518934","https://openalex.org/W4293523262","https://openalex.org/W4312263373","https://openalex.org/W4312440774","https://openalex.org/W4317677707","https://openalex.org/W4317796230","https://openalex.org/W4320458302","https://openalex.org/W4322747033","https://openalex.org/W4378805076","https://openalex.org/W4379382378","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6637178625","https://openalex.org/W6679436768","https://openalex.org/W6685350579","https://openalex.org/W6691603626","https://openalex.org/W6726873649","https://openalex.org/W6739901393","https://openalex.org/W6745740328","https://openalex.org/W6755002340","https://openalex.org/W6767362881","https://openalex.org/W6779235070","https://openalex.org/W6781663043","https://openalex.org/W6796990549","https://openalex.org/W6803953951","https://openalex.org/W6809008970","https://openalex.org/W6850204008"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2185469136","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3159777597","https://openalex.org/W2011264131"],"abstract_inverted_index":{"While":[0],"texts":[1,25],"related":[2],"to":[3,54,108],"images":[4],"convey":[5],"fundamental":[6],"messages":[7],"for":[8],"scene":[9,58],"understanding":[10],"and":[11,51,61,148],"reasoning,":[12],"text-based":[13,70],"visual":[14,20,71],"question":[15,72],"answering":[16,73],"tasks":[17],"concentrate":[18],"on":[19],"questions":[21],"that":[22,35,127,160],"require":[23],"reading":[24],"from":[26,39,88],"images.":[27],"However,":[28],"most":[29],"current":[30],"methods":[31],"add":[32],"multimodal":[33,75],"features":[34],"are":[36,132,146],"independently":[37],"extracted":[38],"a":[40,44,68,115],"given":[41],"image":[42],"into":[43],"reasoning":[45],"model":[46,79],"without":[47],"considering":[48],"their":[49,154],"inter-":[50],"intra-relationships":[52],"according":[53],"three":[55],"modalities":[56,90],"(i.e.,":[57],"texts,":[59],"questions,":[60],"images).":[62],"To":[63],"this":[64,135],"end,":[65],"we":[66,95,149],"propose":[67],"novel":[69],"model,":[74],"graph":[76,97,103,138],"reasoning.":[77],"Our":[78],"first":[80],"extracts":[81],"intramodality":[82],"relationships":[83],"by":[84],"taking":[85],"the":[86,110,125],"representations":[87,145],"identical":[89],"as":[91,128],"semantic":[92,130],"graphs.":[93],"Then,":[94],"present":[96],"multihead":[98],"self-attention,":[99],"which":[100],"boosts":[101],"each":[102,137],"representation":[104,139],"through":[105],"graph-by-graph":[106],"aggregation":[107],"capture":[109],"intermodality":[111],"relationship.":[112],"It":[113],"is":[114],"case":[116],"of":[117],"\u201cso":[118],"many":[119,122],"heads,":[120],"so":[121],"wits\u201d":[123],"in":[124,134],"sense":[126],"more":[129,141],"graphs":[131],"involved":[133],"process,":[136],"becomes":[140],"effective.":[142],"Finally,":[143],"these":[144],"reprojected,":[147],"perform":[150],"answer":[151],"prediction":[152],"with":[153,168],"outputs.":[155],"The":[156],"experimental":[157],"results":[158],"demonstrate":[159],"our":[161],"approach":[162],"realizes":[163],"substantially":[164],"better":[165],"performance":[166],"compared":[167],"other":[169],"state-of-the-art":[170],"models.":[171]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
