{"id":"https://openalex.org/W2963383024","doi":"https://doi.org/10.18653/v1/d16-1044","title":"Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding","display_name":"Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2963383024","doi":"https://doi.org/10.18653/v1/d16-1044","mag":"2963383024"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d16-1044","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d16-1044","pdf_url":"https://www.aclweb.org/anthology/D16-1044.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D16-1044.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052878017","display_name":"Akira Fukui","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Akira Fukui","raw_affiliation_strings":["Sony Corp., Tokyo, Japan","UC Berkeley EECS, CA, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Corp., Tokyo, Japan","institution_ids":[]},{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027383005","display_name":"Dong Huk Park","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Huk Park","raw_affiliation_strings":["UC Berkeley EECS, CA, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084271210","display_name":"Daylen Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daylen Yang","raw_affiliation_strings":["UC Berkeley EECS, CA, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037747070","display_name":"Anna Rohrbach","orcid":"https://orcid.org/0000-0003-1161-6006"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Anna Rohrbach","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbrcken, Germany","UC Berkeley EECS, CA, United States","Max Planck Institute for Informatics, Saarbr\u00fccken, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbrcken, Germany","institution_ids":["https://openalex.org/I4210109712"]},{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]},{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029105520","display_name":"Trevor Darrell","orcid":"https://orcid.org/0000-0001-5453-8533"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Darrell","raw_affiliation_strings":["UC Berkeley EECS, CA, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024481540","display_name":"Marcus Rohrbach","orcid":"https://orcid.org/0000-0001-5908-7751"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcus Rohrbach","raw_affiliation_strings":["UC Berkeley EECS, CA, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley EECS, CA, United States","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5052878017"],"corresponding_institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":72.1383,"has_fulltext":true,"cited_by_count":1399,"citation_normalized_percentile":{"value":0.999259,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"457","last_page":"468"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.7701165080070496},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.668697714805603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6665622591972351},{"id":"https://openalex.org/keywords/bilinear-interpolation","display_name":"Bilinear interpolation","score":0.5713555812835693},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47054487466812134},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4300236999988556},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.23181626200675964},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08843165636062622}],"concepts":[{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.7701165080070496},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.668697714805603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6665622591972351},{"id":"https://openalex.org/C205203396","wikidata":"https://www.wikidata.org/wiki/Q612143","display_name":"Bilinear interpolation","level":2,"score":0.5713555812835693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47054487466812134},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4300236999988556},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.23181626200675964},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08843165636062622},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d16-1044","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d16-1044","pdf_url":"https://www.aclweb.org/anthology/D16-1044.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d16-1044","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d16-1044","pdf_url":"https://www.aclweb.org/anthology/D16-1044.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1261808637","display_name":null,"funder_award_id":"N000141110688","funder_id":"https://openalex.org/F4320333591","funder_display_name":"Multidisciplinary University Research Initiative"},{"id":"https://openalex.org/G2997197412","display_name":null,"funder_award_id":"IIS-1212798","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4311507624","display_name":null,"funder_award_id":"IIS-1427425","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4771600513","display_name":"NRI: Collaborative Research: Shall I Touch This?: Navigating the Look and Feel of Complex Surfaces","funder_award_id":"1427425","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5641105978","display_name":"RI: Large: Collaborative Research: Reconstructive recognition: Uniting statistical scene understanding and physics-based visual reasoning","funder_award_id":"1212798","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6843723984","display_name":null,"funder_award_id":"N000141110688","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320333591","display_name":"Multidisciplinary University Research Initiative","ror":null},{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963383024.pdf","grobid_xml":"https://content.openalex.org/works/W2963383024.grobid-xml"},"referenced_works_count":69,"referenced_works":["https://openalex.org/W7746136","https://openalex.org/W21006490","https://openalex.org/W92662927","https://openalex.org/W114341944","https://openalex.org/W1493892051","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1536680647","https://openalex.org/W1544092585","https://openalex.org/W1686810756","https://openalex.org/W1773149199","https://openalex.org/W1811254738","https://openalex.org/W1836465849","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1931795219","https://openalex.org/W1933349210","https://openalex.org/W2006147162","https://openalex.org/W2088049833","https://openalex.org/W2100235303","https://openalex.org/W2104657103","https://openalex.org/W2108598243","https://openalex.org/W2123024445","https://openalex.org/W2130942839","https://openalex.org/W2131494463","https://openalex.org/W2146897752","https://openalex.org/W2149557440","https://openalex.org/W2155541015","https://openalex.org/W2170653751","https://openalex.org/W2171361956","https://openalex.org/W2171810632","https://openalex.org/W2184188583","https://openalex.org/W2185175083","https://openalex.org/W2190656909","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2251512949","https://openalex.org/W2261271299","https://openalex.org/W2277195237","https://openalex.org/W2293453011","https://openalex.org/W2302548814","https://openalex.org/W2340874616","https://openalex.org/W2391839782","https://openalex.org/W2418349398","https://openalex.org/W2481240925","https://openalex.org/W2563399268","https://openalex.org/W2950178297","https://openalex.org/W2950726992","https://openalex.org/W2950761309","https://openalex.org/W2962706528","https://openalex.org/W2962749469","https://openalex.org/W2962835968","https://openalex.org/W2963066927","https://openalex.org/W2963143606","https://openalex.org/W2963389687","https://openalex.org/W2963398599","https://openalex.org/W2963579811","https://openalex.org/W2963656855","https://openalex.org/W2963735856","https://openalex.org/W2963954913","https://openalex.org/W2964091467","https://openalex.org/W2964118342","https://openalex.org/W2964121744","https://openalex.org/W2964138017","https://openalex.org/W3098232790","https://openalex.org/W4249843299","https://openalex.org/W4294375521","https://openalex.org/W4297813007","https://openalex.org/W4302404635"],"related_works":["https://openalex.org/W2950524887","https://openalex.org/W2883502031","https://openalex.org/W2261271299","https://openalex.org/W4280638452","https://openalex.org/W4285020665","https://openalex.org/W2963066927","https://openalex.org/W3160506688","https://openalex.org/W3111811104","https://openalex.org/W4380083739","https://openalex.org/W2964944724"],"abstract_inverted_index":{"Modeling":[0],"textual":[1,54,73],"or":[2,12,44],"visual":[3,13,26,52,71,109,127],"information":[4],"with":[5,34,152],"vector":[6,32],"representations":[7,33],"trained":[8],"from":[9],"large":[10],"language":[11],"datasets":[14],"has":[15],"been":[16],"successfully":[17],"explored":[18],"in":[19],"recent":[20],"years.":[21],"However,":[22],"tasks":[23],"such":[24],"as":[25,46,48,63,65],"question":[27,110,128,154],"answering":[28,111],"require":[29],"combining":[30],"these":[31,59],"each":[35],"other.":[36],"Approaches":[37],"to":[38,83,96,147],"multimodal":[39,101],"pooling":[40,94],"include":[41],"element-wise":[42],"product":[43,68,78],"sum,":[45],"well":[47],"concatenation":[49],"of":[50,69,120],"the":[51,70,76,108,118,149,153,159,162,166],"and":[53,72,98,112,145,165],"representations.":[55],"We":[56,103,115],"hypothesize":[57],"that":[58],"methods":[60],"are":[61],"not":[62],"expressive":[64],"an":[66,132],"outer":[67,77],"vectors.":[74],"As":[75],"is":[79],"typically":[80],"infeasible":[81],"due":[82],"its":[84],"high":[85],"dimensionality,":[86],"we":[87,130],"instead":[88],"propose":[89],"utilizing":[90],"Multimodal":[91],"Compact":[92],"Bilinear":[93],"(MCB)":[95],"efficiently":[97],"expressively":[99],"combine":[100,148],"features.":[102],"extensively":[104],"evaluate":[105],"MCB":[106,121,136],"on":[107,161],"grounding":[113],"tasks.":[114],"consistently":[116],"show":[117],"benefit":[119],"over":[122,142],"ablations":[123],"without":[124],"MCB.":[125],"For":[126],"answering,":[129],"present":[131],"architecture":[133],"which":[134],"uses":[135],"twice,":[137],"once":[138],"for":[139],"predicting":[140],"attention":[141],"spatial":[143],"features":[144],"again":[146],"attended":[150],"representation":[151],"representation.":[155],"This":[156],"model":[157],"outperforms":[158],"state-of-the-art":[160],"Visual7W":[163],"dataset":[164],"VQA":[167],"challenge.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":20},{"year":2025,"cited_by_count":90},{"year":2024,"cited_by_count":121},{"year":2023,"cited_by_count":156},{"year":2022,"cited_by_count":173},{"year":2021,"cited_by_count":207},{"year":2020,"cited_by_count":205},{"year":2019,"cited_by_count":183},{"year":2018,"cited_by_count":157},{"year":2017,"cited_by_count":81},{"year":2016,"cited_by_count":6}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
