{"id":"https://openalex.org/W3198570286","doi":"https://doi.org/10.1145/3460426.3463584","title":"Cross-Modal Self-Attention with Multi-Task Pre-Training for Medical Visual Question Answering","display_name":"Cross-Modal Self-Attention with Multi-Task Pre-Training for Medical Visual Question Answering","publication_year":2021,"publication_date":"2021-08-24","ids":{"openalex":"https://openalex.org/W3198570286","doi":"https://doi.org/10.1145/3460426.3463584","mag":"3198570286"},"language":"en","primary_location":{"id":"doi:10.1145/3460426.3463584","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463584","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075975483","display_name":"Haifan Gong","orcid":"https://orcid.org/0000-0002-2749-6830"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haifan Gong","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058818348","display_name":"Guanqi Chen","orcid":"https://orcid.org/0000-0002-1440-3340"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanqi Chen","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033677670","display_name":"Sishuo Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Sishuo Liu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108557359","display_name":"Yizhou Yu","orcid":"https://orcid.org/0000-0002-0470-5548"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Yizhou Yu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042965510","display_name":"Guanbin Li","orcid":"https://orcid.org/0000-0002-4805-0926"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbin Li","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075975483"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":6.2955,"has_fulltext":false,"cited_by_count":95,"citation_normalized_percentile":{"value":0.97505757,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"456","last_page":"460"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8231646418571472},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6238270401954651},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6075313091278076},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4970147907733917},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4904308021068573},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4897036850452423},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48843351006507874},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.46954473853111267},{"id":"https://openalex.org/keywords/witness","display_name":"Witness","score":0.4290751814842224},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.41877514123916626},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33289045095443726}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8231646418571472},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6238270401954651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6075313091278076},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4970147907733917},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4904308021068573},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4897036850452423},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48843351006507874},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.46954473853111267},{"id":"https://openalex.org/C2776900844","wikidata":"https://www.wikidata.org/wiki/Q8028383","display_name":"Witness","level":2,"score":0.4290751814842224},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.41877514123916626},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33289045095443726},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3460426.3463584","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463584","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6800000071525574}],"awards":[{"id":"https://openalex.org/G1874218645","display_name":null,"funder_award_id":"61976250","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2595551253","https://openalex.org/W2745461083","https://openalex.org/W2889187377","https://openalex.org/W2911489562","https://openalex.org/W2963091558","https://openalex.org/W2963150162","https://openalex.org/W2963383024","https://openalex.org/W2963403868","https://openalex.org/W2963466845","https://openalex.org/W2963954913","https://openalex.org/W2980088508","https://openalex.org/W3011651912","https://openalex.org/W3011797839","https://openalex.org/W3094950914","https://openalex.org/W3096924377","https://openalex.org/W3100297028","https://openalex.org/W6902002720"],"related_works":["https://openalex.org/W2890518300","https://openalex.org/W2366687089","https://openalex.org/W588843504","https://openalex.org/W2905271011","https://openalex.org/W3164948662","https://openalex.org/W4289536128","https://openalex.org/W2440023763","https://openalex.org/W2962474440","https://openalex.org/W3153597579","https://openalex.org/W4298151006"],"abstract_inverted_index":{"Due":[0],"to":[1,20,35,78,99],"the":[2,49,54,82,87,102,119],"severe":[3],"lack":[4],"of":[5,10,30,53,84,110],"labeled":[6],"data,":[7],"existing":[8,123],"methods":[9],"medical":[11],"visual":[12,31,111],"question":[13],"answering":[14],"usually":[15],"rely":[16],"on":[17],"transfer":[18],"learning":[19,69],"obtain":[21],"effective":[22,108],"image":[23,63,89],"feature":[24,64],"representation":[25],"and":[26,32,46,51,71,112,128],"use":[27],"cross-modal":[28,58,96],"fusion":[29,109],"linguistic":[33,113],"features":[34,56,85],"achieve":[36],"question-related":[37],"answer":[38],"prediction.":[39],"These":[40],"two":[41],"phases":[42],"are":[43,130],"performed":[44],"independently":[45],"without":[47],"considering":[48],"compatibility":[50],"applicability":[52,83],"pre-trained":[55],"for":[57,86,106],"fusion.":[59],"Thus,":[60],"we":[61,93],"reformulate":[62],"pre-training":[65],"as":[66],"a":[67,95],"multi-task":[68],"paradigm":[70],"witness":[72],"its":[73],"extraordinary":[74],"superiority,":[75],"forcing":[76],"it":[77],"take":[79],"into":[80],"account":[81],"specific":[88],"comprehension":[90],"task.":[91],"Furthermore,":[92],"introduce":[94],"self-attention~(CMSA)":[97],"module":[98],"selectively":[100],"capture":[101],"long-range":[103],"contextual":[104],"relevance":[105],"more":[107],"features.":[114],"Experimental":[115],"results":[116],"demonstrate":[117],"that":[118],"proposed":[120],"method":[121],"outperforms":[122],"state-of-the-art":[124],"methods.":[125],"Our":[126],"code":[127],"models":[129],"available":[131],"at":[132],"https://github.com/haifangong/CMSA-MTPT-4-MedicalVQA.":[133]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":27},{"year":2024,"cited_by_count":20},{"year":2023,"cited_by_count":29},{"year":2022,"cited_by_count":16}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
