{"id":"https://openalex.org/W4416250398","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228425","title":"Efficient Bilinear Attention-based Fusion for Medical Visual Question Answering","display_name":"Efficient Bilinear Attention-based Fusion for Medical Visual Question Answering","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416250398","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228425"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228425","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100723116","display_name":"Zhilin Zhang","orcid":"https://orcid.org/0000-0001-5913-4685"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhilin Zhang","raw_affiliation_strings":["New York University,Tandon School of Engineering,New York,USA"],"affiliations":[{"raw_affiliation_string":"New York University,Tandon School of Engineering,New York,USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100440033","display_name":"Jie Wang","orcid":"https://orcid.org/0000-0001-9902-5723"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Wang","raw_affiliation_strings":["Tongji University,College of Electronic and Information Engineering,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Tongji University,College of Electronic and Information Engineering,Shanghai,China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033761861","display_name":"Zhanghao Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zhanghao Qin","raw_affiliation_strings":["Nanyang Technological University,School of Electrical and Electronic Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,School of Electrical and Electronic Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017353019","display_name":"Ruiqi Zhu","orcid":"https://orcid.org/0000-0001-6416-9647"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiqi Zhu","raw_affiliation_strings":["Tongji University,College of Electronic and Information Engineering,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Tongji University,College of Electronic and Information Engineering,Shanghai,China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050426623","display_name":"Xiaoliang Gong","orcid":"https://orcid.org/0009-0004-6007-2222"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoliang Gong","raw_affiliation_strings":["Tongji University,College of Electronic and Information Engineering,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Tongji University,College of Electronic and Information Engineering,Shanghai,China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100723116"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":1.428,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87250427,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.004699999932199717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.004699999932199717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bilinear-interpolation","display_name":"Bilinear interpolation","score":0.522599995136261},{"id":"https://openalex.org/keywords/orthogonality","display_name":"Orthogonality","score":0.5135999917984009},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.5023999810218811},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4912000000476837},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.4724000096321106},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.4449000060558319},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4268999993801117},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.3806000053882599}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6940000057220459},{"id":"https://openalex.org/C205203396","wikidata":"https://www.wikidata.org/wiki/Q612143","display_name":"Bilinear interpolation","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C17137986","wikidata":"https://www.wikidata.org/wiki/Q215067","display_name":"Orthogonality","level":2,"score":0.5135999917984009},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.5023999810218811},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4912000000476837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47699999809265137},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.4724000096321106},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4268999993801117},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4260999858379364},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.3806000053882599},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C2778971668","wikidata":"https://www.wikidata.org/wiki/Q5510284","display_name":"Fusion rules","level":4,"score":0.28619998693466187},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27970001101493835},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228425","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2064675550","https://openalex.org/W2194775991","https://openalex.org/W2901466771","https://openalex.org/W2911489562","https://openalex.org/W2963150162","https://openalex.org/W2963717374","https://openalex.org/W2963954913","https://openalex.org/W2966683369","https://openalex.org/W2970231061","https://openalex.org/W2979525559","https://openalex.org/W3082429341","https://openalex.org/W3094950914","https://openalex.org/W3165058054","https://openalex.org/W3203255640","https://openalex.org/W3203635737","https://openalex.org/W4312784228","https://openalex.org/W4379660316","https://openalex.org/W4386566421","https://openalex.org/W4387211738","https://openalex.org/W4405669265"],"related_works":[],"abstract_inverted_index":{"Medical":[0],"Visual":[1],"Question":[2],"Answering":[3],"(MedVQA)":[4],"has":[5,36],"attracted":[6],"growing":[7],"interest":[8],"at":[9],"the":[10,37,116],"intersection":[11],"of":[12,118,135],"medical":[13,25,177],"image":[14,178],"understanding":[15],"and":[16,27,43,89,108,138,149,165],"natural":[17],"language":[18],"processing":[19],"for":[20,175],"clinical":[21,33],"applications.":[22],"By":[23],"interpreting":[24],"images":[26],"providing":[28],"precise":[29],"answers":[30],"to":[31,39,94],"relevant":[32],"inquiries,":[34],"MedVQA":[35,159],"potential":[38],"support":[40],"diagnostic":[41],"decision-making":[42],"reduce":[44],"workload":[45],"across":[46],"various":[47],"fields":[48],"like":[49,122],"radiology.":[50],"While":[51],"recent":[52],"approaches":[53],"rely":[54],"heavily":[55],"on":[56,63,153,156],"unified":[57],"large":[58],"pre-trained":[59],"Visual-Language":[60],"Models,":[61],"research":[62],"more":[64],"efficient":[65],"fusion":[66,80,113,120],"mechanisms":[67],"remains":[68],"relatively":[69],"limited":[70],"in":[71],"this":[72,75],"domain.":[73],"In":[74],"paper,":[76],"we":[77],"introduce":[78],"a":[79,90,172],"model,":[81],"OMniBAN,":[82],"that":[83,128,168],"integrates":[84],"Orthogonality":[85],"loss,":[86],"Multi-head":[87],"attention,":[88],"Bilinear":[91],"Attention":[92],"Network":[93],"achieve":[95],"high":[96],"computational":[97,182],"efficiency":[98,164],"as":[99,101],"well":[100],"solid":[102],"performance.":[103],"We":[104],"conduct":[105],"comprehensive":[106],"experiments":[107],"demonstrate":[109],"how":[110],"bilinear":[111],"attention":[112],"can":[114],"approximate":[115],"performance":[117,148],"larger":[119],"models":[121],"cross-modal":[123],"Transformer.":[124],"Our":[125],"results":[126],"show":[127],"OMniBAN":[129,169],"requires":[130],"fewer":[131],"parameters":[132],"(approximately":[133,142],"2/3":[134],"Transformer-based":[136],"Co-Attention)":[137],"substantially":[139],"lower":[140],"FLOPs":[141],"1/4),":[143],"while":[144],"achieving":[145],"comparable":[146],"overall":[147],"even":[150],"slight":[151],"improvements":[152],"closed-ended":[154],"questions":[155],"two":[157],"key":[158],"benchmarks.":[160],"This":[161],"balance":[162],"between":[163],"accuracy":[166],"suggests":[167],"could":[170],"be":[171],"viable":[173],"option":[174],"real-world":[176],"question":[179],"answering,":[180],"where":[181],"resources":[183],"are":[184],"often":[185],"constrained.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
