{"id":"https://openalex.org/W4401358581","doi":"https://doi.org/10.1109/tpami.2024.3438887","title":"Transformer Module Networks for Systematic Generalization in Visual Question Answering","display_name":"Transformer Module Networks for Systematic Generalization in Visual Question Answering","publication_year":2024,"publication_date":"2024-08-06","ids":{"openalex":"https://openalex.org/W4401358581","doi":"https://doi.org/10.1109/tpami.2024.3438887","pmid":"https://pubmed.ncbi.nlm.nih.gov/39106140"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3438887","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3438887","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006390702","display_name":"Moyuru Yamada","orcid":"https://orcid.org/0009-0009-1907-7503"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Moyuru Yamada","raw_affiliation_strings":["Fujitsu Limited, Kawasaki, Kanagawa, Japan","Fujitsu Research of India Private Limited, Bangalore, KA, India"],"affiliations":[{"raw_affiliation_string":"Fujitsu Limited, Kawasaki, Kanagawa, Japan","institution_ids":["https://openalex.org/I2252096349"]},{"raw_affiliation_string":"Fujitsu Research of India Private Limited, Bangalore, KA, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015476752","display_name":"Vanessa D\u2019Amario","orcid":"https://orcid.org/0000-0002-4749-6192"},"institutions":[{"id":"https://openalex.org/I203088144","display_name":"Nova Southeastern University","ror":"https://ror.org/042bbge36","country_code":"US","type":"education","lineage":["https://openalex.org/I203088144"]},{"id":"https://openalex.org/I4210094759","display_name":"Fujitsu (United States)","ror":"https://ror.org/0073whr05","country_code":"US","type":"company","lineage":["https://openalex.org/I2252096349","https://openalex.org/I4210094759"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vanessa D\u2019Amario","raw_affiliation_strings":["Fujitsu Research of America, Inc., Sunnyvale, CA, USA","Nova Southeastern University, Fort Lauderdale, FL, USA","Center for Brains, Minds and Machines, Cambridge, MA, USA","Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Fujitsu Research of America, Inc., Sunnyvale, CA, USA","institution_ids":["https://openalex.org/I4210094759"]},{"raw_affiliation_string":"Nova Southeastern University, Fort Lauderdale, FL, USA","institution_ids":["https://openalex.org/I203088144"]},{"raw_affiliation_string":"Center for Brains, Minds and Machines, Cambridge, MA, USA","institution_ids":[]},{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059343040","display_name":"Kentaro Takemoto","orcid":"https://orcid.org/0000-0003-2621-9046"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kentaro Takemoto","raw_affiliation_strings":["Fujitsu Limited, Kawasaki, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Fujitsu Limited, Kawasaki, Kanagawa, Japan","institution_ids":["https://openalex.org/I2252096349"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020830959","display_name":"Xavier Boix","orcid":"https://orcid.org/0000-0003-4656-3485"},"institutions":[{"id":"https://openalex.org/I4210094759","display_name":"Fujitsu (United States)","ror":"https://ror.org/0073whr05","country_code":"US","type":"company","lineage":["https://openalex.org/I2252096349","https://openalex.org/I4210094759"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xavier Boix","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA","Center for Brains, Minds and Machines, Cambridge, MA, USA","Fujitsu Research of America, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Center for Brains, Minds and Machines, Cambridge, MA, USA","institution_ids":[]},{"raw_affiliation_string":"Fujitsu Research of America, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210094759"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042578984","display_name":"Tomotake Sasaki","orcid":"https://orcid.org/0000-0002-3376-2779"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]},{"id":"https://openalex.org/I4210115548","display_name":"Japan Electronics College","ror":"https://ror.org/024hvm870","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210115548"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomotake Sasaki","raw_affiliation_strings":["Fujitsu Limited, Kawasaki, Kanagawa, Japan","Center for Brains, Minds and Machines, Cambridge, MA, USA","Japan Electronics College, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Fujitsu Limited, Kawasaki, Kanagawa, Japan","institution_ids":["https://openalex.org/I2252096349"]},{"raw_affiliation_string":"Center for Brains, Minds and Machines, Cambridge, MA, USA","institution_ids":[]},{"raw_affiliation_string":"Japan Electronics College, Tokyo, Japan","institution_ids":["https://openalex.org/I4210115548"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006390702"],"corresponding_institution_ids":["https://openalex.org/I2252096349"],"apc_list":null,"apc_paid":null,"fwci":0.7502,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.7148309,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"46","issue":"12","first_page":"10096","last_page":"10105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7827597856521606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7325810194015503},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5093613862991333},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5041605234146118},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5000565052032471},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4302118718624115},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1407071352005005},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.0975770354270935},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09204009175300598},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07200029492378235}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7827597856521606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7325810194015503},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5093613862991333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5041605234146118},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5000565052032471},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4302118718624115},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1407071352005005},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0975770354270935},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09204009175300598},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07200029492378235},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3438887","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3438887","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:39106140","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39106140","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311508","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2277195237","https://openalex.org/W2561715562","https://openalex.org/W2745461083","https://openalex.org/W2793546384","https://openalex.org/W2963224792","https://openalex.org/W2963518342","https://openalex.org/W2963644680","https://openalex.org/W2964118342","https://openalex.org/W2970231061","https://openalex.org/W2998356391","https://openalex.org/W3016211260","https://openalex.org/W3090449556","https://openalex.org/W3099849198","https://openalex.org/W3110744880","https://openalex.org/W3118500473","https://openalex.org/W3159619744","https://openalex.org/W3173220247","https://openalex.org/W3174366544","https://openalex.org/W4287995219","https://openalex.org/W4366330503","https://openalex.org/W4388093101","https://openalex.org/W6620707391","https://openalex.org/W6739901393","https://openalex.org/W6748655984","https://openalex.org/W6754944153","https://openalex.org/W6756192261","https://openalex.org/W6757902542","https://openalex.org/W6772118804","https://openalex.org/W6775209659","https://openalex.org/W6783948045","https://openalex.org/W6784333009","https://openalex.org/W6803307422","https://openalex.org/W6803694141","https://openalex.org/W6851592950","https://openalex.org/W6857958404","https://openalex.org/W7066730619"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4381058564","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W3003945460","https://openalex.org/W2964413124"],"abstract_inverted_index":{"Transformers":[0,63,114],"achieve":[1,39,99],"great":[2],"performance":[3,45,103,141],"on":[4,93],"Visual":[5],"Question":[6],"Answering":[7],"(VQA).":[8],"However,":[9],"their":[10],"systematic":[11,43,101],"generalization":[12,44,102],"capabilities,":[13],"i.e.,":[14,30],"handling":[15],"novel":[16,90,116],"combinations":[17],"of":[18,33,62,95,118,139],"known":[19],"concepts,":[20],"is":[21],"unclear.":[22],"We":[23,120],"reveal":[24],"that":[25,35,122],"Neural":[26],"Module":[27,86],"Networks":[28],"(NMNs),":[29],"question-specific":[31],"compositions":[32,94,117],"modules":[34,53],"tackle":[36],"a":[37,89],"sub-task,":[38],"better":[40],"or":[41],"similar":[42],"than":[46,110],"the":[47,125,130,137],"conventional":[48],"Transformers,":[49],"even":[50],"though":[51],"NMNs'":[52],"are":[54,136],"CNN-based.":[55],"In":[56],"order":[57],"to":[58,66,80],"address":[59],"this":[60,69],"shortcoming":[61],"with":[64],"respect":[65],"NMNs,":[67],"in":[68,104],"paper":[70],"we":[71,83],"investigate":[72],"whether":[73],"and":[74],"how":[75],"modularity":[76],"can":[77],"bring":[78],"benefits":[79],"Transformers.":[81],"Namely,":[82],"introduce":[84],"Transformer":[85,96],"Network":[87],"(TMN),":[88],"NMN":[91],"based":[92],"modules.":[97],"TMNs":[98],"state-of-the-art":[100],"three":[105],"VQA":[106],"datasets,":[107],"improving":[108],"more":[109],"30%":[111],"over":[112],"standard":[113],"for":[115,133],"sub-tasks.":[119],"show":[121],"not":[123],"only":[124],"module":[126,131],"composition":[127],"but":[128],"also":[129],"specialization":[132],"each":[134],"sub-task":[135],"key":[138],"such":[140],"gain.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
