{"id":"https://openalex.org/W4403578283","doi":"https://doi.org/10.1145/3627673.3679880","title":"Beyond Language Bias: Overcoming Multimodal Shortcut and Distribution Biases for Robust Visual Question Answering","display_name":"Beyond Language Bias: Overcoming Multimodal Shortcut and Distribution Biases for Robust Visual Question Answering","publication_year":2024,"publication_date":"2024-10-20","ids":{"openalex":"https://openalex.org/W4403578283","doi":"https://doi.org/10.1145/3627673.3679880"},"language":"en","primary_location":{"id":"doi:10.1145/3627673.3679880","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627673.3679880","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104013015","display_name":"Jingliang Gu","orcid":"https://orcid.org/0009-0007-5968-5146"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingliang Gu","raw_affiliation_strings":["Key Lab of Education Blockchain and Intelligent Technology, Ministry of Education, Guangxi Normal University &amp; Guangxi Key Lab of Multi-source Information Mining and Security, Guangxi Normal University, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of Education Blockchain and Intelligent Technology, Ministry of Education, Guangxi Normal University &amp; Guangxi Key Lab of Multi-source Information Mining and Security, Guangxi Normal University, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I29739308"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100701695","display_name":"Zhixin Li","orcid":"https://orcid.org/0000-0002-5313-6134"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixin Li","raw_affiliation_strings":["Key Lab of Education Blockchain and Intelligent Technology, Ministry of Education, Guangxi Normal University &amp; Guangxi Key Lab of Multi-source Information Mining and Security, Guangxi Normal University, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of Education Blockchain and Intelligent Technology, Ministry of Education, Guangxi Normal University &amp; Guangxi Key Lab of Multi-source Information Mining and Security, Guangxi Normal University, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I29739308"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5104013015"],"corresponding_institution_ids":["https://openalex.org/I29739308"],"apc_list":null,"apc_paid":null,"fwci":0.5248,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66792453,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3767","last_page":"3771"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7694022059440613},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7608058452606201},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48568663001060486},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39535027742385864}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7694022059440613},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7608058452606201},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48568663001060486},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39535027742385864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3627673.3679880","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627673.3679880","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5899999737739563,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1602136775","https://openalex.org/W1933349210","https://openalex.org/W2745461083","https://openalex.org/W2784163702","https://openalex.org/W2963466847","https://openalex.org/W2963644680","https://openalex.org/W2963954913","https://openalex.org/W2964138343","https://openalex.org/W2986763679","https://openalex.org/W3035517717","https://openalex.org/W3103152812","https://openalex.org/W3177934633","https://openalex.org/W3195129957","https://openalex.org/W3202778561","https://openalex.org/W3213504576","https://openalex.org/W4214917601","https://openalex.org/W4386065611","https://openalex.org/W4386066382","https://openalex.org/W4393033612"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Recent":[0],"studies":[1,41],"have":[2,45,54],"found":[3],"that":[4,189],"many":[5],"VQA":[6,29,51,69],"models":[7],"are":[8],"influenced":[9],"by":[10,71],"biases,":[11,82,95],"preventing":[12],"them":[13],"from":[14,127],"effectively":[15],"using":[16,104,201],"multimodal":[17],"information":[18],"for":[19],"reasoning.":[20],"Consequently,":[21],"these":[22],"methods,":[23],"which":[24],"perform":[25],"well":[26],"on":[27,34,47,140,196,209],"standard":[28],"datasets,":[30],"exhibit":[31],"underwhelming":[32],"performance":[33,195],"the":[35,43,64,73,128,153,158,162,167,171,174,179,197,210],"bias-sensitive":[36],"VQA-CP":[37],"dataset.":[38,212],"Although":[39],"numerous":[40],"in":[42,50,68,157],"past":[44],"focused":[46],"mitigating":[48],"biases":[49,103],"models,":[52],"most":[53],"only":[55],"considered":[56],"language":[57,94],"bias.":[58,77],"In":[59,161],"this":[60],"paper,":[61],"we":[62,83,97,116,165],"address":[63],"issue":[65],"of":[66,76,89,173,182],"bias":[67,86],"task":[70],"targeting":[72],"various":[74],"sources":[75],"Specifically,":[78],"to":[79,101,121,151,177],"counteract":[80],"shortcut":[81],"integrate":[84],"a":[85,105,118,123,147],"detector":[87],"capable":[88],"capturing":[90],"both":[91],"vision":[92],"and":[93,96,109,143,205],"reinforce":[98],"its":[99],"ability":[100,156],"capture":[102],"generative":[106],"adversarial":[107],"network":[108],"knowledge":[110],"distillation.":[111],"To":[112],"combat":[113],"distribution":[114],"bias,":[115],"use":[117],"cosine":[119,124,168],"classifier":[120],"obtain":[122,178],"feature":[125,159],"branch":[126],"base":[129,175],"model,":[130],"training":[131],"it":[132],"with":[133,146,170],"an":[134],"adaptive":[135],"angular":[136],"margin":[137],"loss":[138,150],"based":[139],"answer":[141],"frequency":[142],"difficulty,":[144],"along":[145],"supervised":[148],"contrastive":[149],"enhance":[152],"model's":[154],"classification":[155],"space.":[160],"prediction":[163,172,181],"stage,":[164],"fuse":[166],"features":[169],"model":[176],"final":[180],"our":[183,190],"model.":[184],"Finally,":[185],"extensive":[186],"experiments":[187],"demonstrate":[188],"approach":[191],"SD-VQA":[192],"achieves":[193,206],"state-of-the-art":[194],"VQA-CPv2":[198],"dataset":[199],"without":[200],"any":[202],"data":[203],"balancing,":[204],"competitive":[207],"results":[208],"VQAv2":[211]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
