{"id":"https://openalex.org/W4390479117","doi":"https://doi.org/10.1145/3595916.3626381","title":"Adaptive Fusion for Visual Question Answering: Integrating Multi-Label Classification and Similarity Matching","display_name":"Adaptive Fusion for Visual Question Answering: Integrating Multi-Label Classification and Similarity Matching","publication_year":2023,"publication_date":"2023-12-06","ids":{"openalex":"https://openalex.org/W4390479117","doi":"https://doi.org/10.1145/3595916.3626381"},"language":"en","primary_location":{"id":"doi:10.1145/3595916.3626381","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626381","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626381","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626381","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074153734","display_name":"Zhengtao Yu","orcid":"https://orcid.org/0009-0009-4753-3529"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhengtao Yu","raw_affiliation_strings":["Fuyang Normal University, China"],"raw_orcid":"https://orcid.org/0009-0009-4753-3529","affiliations":[{"raw_affiliation_string":"Fuyang Normal University, China","institution_ids":["https://openalex.org/I13175533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047727424","display_name":"Jia Zhao","orcid":"https://orcid.org/0000-0002-7440-0109"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Zhao","raw_affiliation_strings":["Fuyang Normal University, China"],"raw_orcid":"https://orcid.org/0000-0002-7440-0109","affiliations":[{"raw_affiliation_string":"Fuyang Normal University, China","institution_ids":["https://openalex.org/I13175533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079680866","display_name":"Huiling Wang","orcid":"https://orcid.org/0009-0006-2962-0619"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huiling Wang","raw_affiliation_strings":["Fuyang normal university, China"],"raw_orcid":"https://orcid.org/0009-0006-2962-0619","affiliations":[{"raw_affiliation_string":"Fuyang normal university, China","institution_ids":["https://openalex.org/I13175533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079607447","display_name":"Chenliang Guo","orcid":"https://orcid.org/0009-0001-7417-2265"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenliang Guo","raw_affiliation_strings":["Fuyang Normal University, China"],"raw_orcid":"https://orcid.org/0009-0001-7417-2265","affiliations":[{"raw_affiliation_string":"Fuyang Normal University, China","institution_ids":["https://openalex.org/I13175533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115603338","display_name":"Tong Zhou","orcid":"https://orcid.org/0009-0009-9000-8228"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Zhou","raw_affiliation_strings":["Fuyang Normal University, China"],"raw_orcid":"https://orcid.org/0009-0009-9000-8228","affiliations":[{"raw_affiliation_string":"Fuyang Normal University, China","institution_ids":["https://openalex.org/I13175533"]}]},{"author_position":"last","author":{"id":null,"display_name":"Chongxiang Sun","orcid":"https://orcid.org/0009-0006-1651-1229"},"institutions":[{"id":"https://openalex.org/I13175533","display_name":"Fuyang Normal University","ror":"https://ror.org/02njz9p87","country_code":"CN","type":"education","lineage":["https://openalex.org/I13175533"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chongxiang Sun","raw_affiliation_strings":["Fuyang Normal University, China"],"raw_orcid":"https://orcid.org/0009-0006-1651-1229","affiliations":[{"raw_affiliation_string":"Fuyang Normal University, China","institution_ids":["https://openalex.org/I13175533"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074153734"],"corresponding_institution_ids":["https://openalex.org/I13175533"],"apc_list":null,"apc_paid":null,"fwci":0.1136,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.44514691,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8671888113021851},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.743291974067688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.639003336429596},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5996898412704468},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5815790295600891},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5498679280281067},{"id":"https://openalex.org/keywords/multi-label-classification","display_name":"Multi-label classification","score":0.5072562098503113},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4663386046886444},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44875842332839966},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3436671197414398},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09935212135314941},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07401201128959656}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8671888113021851},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.743291974067688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.639003336429596},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5996898412704468},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5815790295600891},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5498679280281067},{"id":"https://openalex.org/C2776482837","wikidata":"https://www.wikidata.org/wiki/Q3553958","display_name":"Multi-label classification","level":2,"score":0.5072562098503113},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4663386046886444},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44875842332839966},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3436671197414398},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09935212135314941},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07401201128959656},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3595916.3626381","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626381","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626381","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3595916.3626381","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626381","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626381","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4227544573","display_name":null,"funder_award_id":"61906044","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G679136769","display_name":null,"funder_award_id":"2020M681984","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390479117.pdf","grobid_xml":"https://content.openalex.org/works/W4390479117.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2321588627","https://openalex.org/W2560730294","https://openalex.org/W2798895485","https://openalex.org/W2963644680","https://openalex.org/W3095963973","https://openalex.org/W3128560592","https://openalex.org/W3177934633","https://openalex.org/W3204924011","https://openalex.org/W4200475325","https://openalex.org/W4210484952","https://openalex.org/W4210940616","https://openalex.org/W4220717373","https://openalex.org/W4283258232","https://openalex.org/W4285815152","https://openalex.org/W4288055272","https://openalex.org/W4294344554","https://openalex.org/W4304207335","https://openalex.org/W4308236280","https://openalex.org/W4321276240"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3159777597","https://openalex.org/W4212839359"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"is":[4,33,80],"an":[5],"important":[6],"multimodal":[7],"task":[8],"in":[9,54,109,181],"which":[10,32,203],"models":[11,25,40],"are":[12],"required":[13],"to":[14,42,46,61,83,97,111,124,137,150,176],"answer":[15,47,171,189],"questions":[16,48],"based":[17],"on":[18,210],"visual":[19,23],"cues.":[20],"However,":[21,67],"most":[22],"question-answering":[24],"suffer":[26],"from":[27],"the":[28,51,55,63,73,99,112,117,122,125,135,139,152,156,164,178,183,192,198],"language":[29,64,179],"prior":[30,65],"problem,":[31],"caused":[34],"by":[35],"data":[36],"bias.":[37],"Specifically,":[38,108],"VQA":[39],"tend":[41],"output":[43,154,166,175],"high-frequency":[44],"answers":[45,123],"while":[49],"ignoring":[50],"information":[52,96],"contained":[53],"images.":[56],"Many":[57],"approaches":[58,69],"have":[59],"emerged":[60],"solve":[62,84],"problem.":[66],"previous":[68],"could":[70],"only":[71],"improve":[72],"performance":[74,207],"of":[75,148,200,208],"easy":[76],"classes,":[77],"and":[78,103,121,129,160],"there":[79],"no":[81],"means":[82],"hard":[85,106],"classes":[86],"effectively.":[87],"In":[88],"this":[89],"paper,":[90],"we":[91,115,144,162,187],"will":[92],"utilize":[93],"more":[94],"semantic":[95],"guide":[98],"model":[100],"for":[101,167,173,191],"learning":[102],"better":[104],"handle":[105],"questions.":[107],"addition":[110],"classification":[113,153,193],"task,":[114],"map":[116],"image":[118],"question":[119],"pairs":[120],"same":[126],"dimensional":[127],"space":[128],"construct":[130],"a":[131,146,205],"similarity":[132],"metric":[133],"between":[134],"two":[136],"get":[138],"answers\u2019":[140,157],"similarity-matching":[141,158],"output.":[142],"Moreover,":[143,186],"learn":[145],"set":[147],"parameters":[149],"fuse":[151],"with":[155],"output,":[159],"finally,":[161],"use":[163,170,188],"fused":[165],"prediction.":[168],"We":[169],"weighting":[172],"each":[174],"mitigate":[177],"priors":[180],"computing":[182],"loss":[184],"function.":[185],"masks":[190],"outputs.":[194],"Experimental":[195],"results":[196],"demonstrate":[197],"effectiveness":[199],"our":[201],"method,":[202],"achieves":[204],"state-of-the-art":[206],"62.20%":[209],"VQA-CP":[211],"v2.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-27T09:02:27.158192","created_date":"2025-10-10T00:00:00"}
