{"id":"https://openalex.org/W4392903117","doi":"https://doi.org/10.1109/icassp48485.2024.10447690","title":"Modality Re-Balance for Visual Question Answering: A Causal Framework","display_name":"Modality Re-Balance for Visual Question Answering: A Causal Framework","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903117","doi":"https://doi.org/10.1109/icassp48485.2024.10447690"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447690","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112858603","display_name":"Xinpeng Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinpeng Lv","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020177243","display_name":"Wanrong Huang","orcid":"https://orcid.org/0000-0001-5778-9055"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanrong Huang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114889392","display_name":"Haotian Wang","orcid":"https://orcid.org/0009-0001-5363-3886"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haotian Wang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102723856","display_name":"Ruochun Jin","orcid":"https://orcid.org/0000-0001-6217-4223"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruochun Jin","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028578817","display_name":"Xueqiong Li","orcid":"https://orcid.org/0000-0002-2364-4947"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueqiong Li","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088132719","display_name":"Zhipeng Lin","orcid":"https://orcid.org/0000-0001-5941-2163"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhipeng Lin","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101518994","display_name":"Shuman Li","orcid":"https://orcid.org/0000-0002-4248-7836"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuman Li","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051672922","display_name":"Yongquan Feng","orcid":"https://orcid.org/0009-0007-4176-4516"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongquan Feng","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065371644","display_name":"Yuhua Tang","orcid":"https://orcid.org/0000-0002-4956-3379"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhua Tang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology","College of Computer Science and Technology, National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5112858603"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.5263,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.61459605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"5650","last_page":"5654"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7684959173202515},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7286547422409058},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.712140679359436},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.6957910060882568},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6300840377807617},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6030797362327576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5458077788352966},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48613426089286804},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.46526291966438293},{"id":"https://openalex.org/keywords/balance","display_name":"Balance (ability)","score":0.46351540088653564},{"id":"https://openalex.org/keywords/causal-reasoning","display_name":"Causal reasoning","score":0.4189065396785736},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3610413074493408},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.35607266426086426},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17395704984664917},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.10907471179962158},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.07748839259147644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7684959173202515},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7286547422409058},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.712140679359436},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.6957910060882568},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6300840377807617},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6030797362327576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5458077788352966},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48613426089286804},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.46526291966438293},{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.46351540088653564},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.4189065396785736},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3610413074493408},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.35607266426086426},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17395704984664917},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.10907471179962158},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.07748839259147644},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447690","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W103844816","https://openalex.org/W1821462560","https://openalex.org/W2479423890","https://openalex.org/W2560730294","https://openalex.org/W2745461083","https://openalex.org/W2962681511","https://openalex.org/W2963644680","https://openalex.org/W2970019270","https://openalex.org/W3034287395","https://openalex.org/W3035517717","https://openalex.org/W3099884329","https://openalex.org/W3104788521","https://openalex.org/W3110575265","https://openalex.org/W3154781046","https://openalex.org/W3175344799","https://openalex.org/W3177934633","https://openalex.org/W4212774754","https://openalex.org/W4304730807","https://openalex.org/W6638523607","https://openalex.org/W6764756247","https://openalex.org/W6777578573","https://openalex.org/W6845777422"],"related_works":["https://openalex.org/W3215034539","https://openalex.org/W4403292511","https://openalex.org/W4313422683","https://openalex.org/W4282978140","https://openalex.org/W3184771105","https://openalex.org/W2894915327","https://openalex.org/W2161504683","https://openalex.org/W4361865420","https://openalex.org/W2574301230","https://openalex.org/W3160160539"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"models":[4],"often":[5,38],"prioritize":[6],"language":[7,27,51],"cues":[8],"over":[9],"visual":[10,71],"knowledge,":[11],"leading":[12],"to":[13,25,40,46,90,100],"the":[14,47,63],"\"language":[15],"prior\"":[16],"phenomenon.":[17],"To":[18],"address":[19],"this,":[20],"researchers":[21],"have":[22],"proposed":[23],"methods":[24],"balance":[26],"and":[28,33,72,109],"image":[29],"information":[30,74],"during":[31],"training":[32],"inference.":[34],"However,":[35],"these":[36],"approaches":[37],"struggle":[39],"capture":[41],"important":[42],"linguistic":[43],"components":[44],"due":[45],"excessive":[48],"exclusion":[49],"of":[50,85],"information.":[52],"Inspired":[53],"by":[54],"causal":[55],"inference,":[56],"we":[57],"introduce":[58],"a":[59],"novel":[60],"approach":[61],"called":[62],"SyMmetrically":[64],"Balanced":[65],"Causal":[66],"framework":[67,79],"(SMBC)":[68],"that":[69,96],"rebalances":[70],"textual":[73],"in":[75],"VQA":[76,102],"tasks.":[77],"This":[78],"allows":[80],"for":[81],"an":[82],"equal":[83],"contribution":[84],"knowledge":[86],"from":[87],"both":[88],"modalities":[89],"inference":[91],"results.":[92],"Experimental":[93],"evaluation":[94],"shows":[95],"SMBC:":[97],"1)":[98],"applies":[99],"prevalent":[101],"models,":[103],"including":[104],"those":[105],"with":[106],"data":[107],"augmentation,":[108],"2)":[110],"consistently":[111],"improves":[112],"performance":[113],"on":[114],"established":[115],"benchmarks.":[116]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
