{"id":"https://openalex.org/W3115819524","doi":"https://doi.org/10.1109/tmm.2020.3046855","title":"Focus Your Attention: A Focal Attention for Multimodal Learning","display_name":"Focus Your Attention: A Focal Attention for Multimodal Learning","publication_year":2020,"publication_date":"2020-12-23","ids":{"openalex":"https://openalex.org/W3115819524","doi":"https://doi.org/10.1109/tmm.2020.3046855","mag":"3115819524"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2020.3046855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.3046855","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100386372","display_name":"Chunxiao Liu","orcid":"https://orcid.org/0000-0002-3307-9558"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chunxiao Liu","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","School of Information Science and Technology, University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0002-3307-9558","affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023341829","display_name":"Zhendong Mao","orcid":"https://orcid.org/0000-0001-5739-8126"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Mao","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0001-5739-8126","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100648981","display_name":"Tianzhu Zhang","orcid":"https://orcid.org/0000-0003-1856-9564"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianzhu Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0003-1856-9564","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081485810","display_name":"An-An Liu","orcid":"https://orcid.org/0000-0001-5755-9145"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"An-An Liu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0001-5755-9145","affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015829772","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0001-9760-8343"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Wang","raw_affiliation_strings":["Xiaomi AI lab, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xiaomi AI lab, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-1151-1792","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100386372"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I4210156404","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.6868,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.7307348,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"24","issue":null,"first_page":"103","last_page":"115"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.839332640171051},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6572118997573853},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6542778015136719},{"id":"https://openalex.org/keywords/focal-point","display_name":"Focal point","score":0.5926177501678467},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5614771246910095},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.522774875164032},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5094752907752991},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.381896048784256},{"id":"https://openalex.org/keywords/cardinal-point","display_name":"Cardinal point","score":0.20261186361312866}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.839332640171051},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6572118997573853},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6542778015136719},{"id":"https://openalex.org/C2779433544","wikidata":"https://www.wikidata.org/wiki/Q1435226","display_name":"Focal point","level":3,"score":0.5926177501678467},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5614771246910095},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.522774875164032},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5094752907752991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.381896048784256},{"id":"https://openalex.org/C138395690","wikidata":"https://www.wikidata.org/wiki/Q376733","display_name":"Cardinal point","level":2,"score":0.20261186361312866},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2020.3046855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.3046855","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5400000214576721,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2458909528","display_name":null,"funder_award_id":"WK3480000008","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5198023733","display_name":null,"funder_award_id":"2020YFB1406603","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7350247691","display_name":null,"funder_award_id":"61525206","funder_id":"https://openalex.org/F4320336125","funder_display_name":"National Science Fund for Distinguished Young Scholars"},{"id":"https://openalex.org/G8374609628","display_name":null,"funder_award_id":"U19A2057","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320336125","display_name":"National Science Fund for Distinguished Young Scholars","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1514535095","https://openalex.org/W1773149199","https://openalex.org/W1797268635","https://openalex.org/W1861492603","https://openalex.org/W1916445035","https://openalex.org/W2041062763","https://openalex.org/W2101298495","https://openalex.org/W2112912048","https://openalex.org/W2123024445","https://openalex.org/W2183341477","https://openalex.org/W2277195237","https://openalex.org/W2295151578","https://openalex.org/W2316082076","https://openalex.org/W2342543219","https://openalex.org/W2398118205","https://openalex.org/W2463565445","https://openalex.org/W2546696630","https://openalex.org/W2550553598","https://openalex.org/W2552579943","https://openalex.org/W2557449848","https://openalex.org/W2563399268","https://openalex.org/W2575842049","https://openalex.org/W2586573945","https://openalex.org/W2606473278","https://openalex.org/W2739107216","https://openalex.org/W2774267535","https://openalex.org/W2778100917","https://openalex.org/W2778940641","https://openalex.org/W2798786641","https://openalex.org/W2799062770","https://openalex.org/W2808877322","https://openalex.org/W2888166343","https://openalex.org/W2888975113","https://openalex.org/W2890718122","https://openalex.org/W2896442399","https://openalex.org/W2898403805","https://openalex.org/W2900953995","https://openalex.org/W2903838325","https://openalex.org/W2945260553","https://openalex.org/W2951464224","https://openalex.org/W2953016680","https://openalex.org/W2962964995","https://openalex.org/W2963143316","https://openalex.org/W2963163163","https://openalex.org/W2963467339","https://openalex.org/W2963521239","https://openalex.org/W2963722138","https://openalex.org/W2963882743","https://openalex.org/W2963954913","https://openalex.org/W2963966654","https://openalex.org/W2964024144","https://openalex.org/W2964081303","https://openalex.org/W2964120214","https://openalex.org/W2965289598","https://openalex.org/W2981963155","https://openalex.org/W2982078236","https://openalex.org/W3004349648","https://openalex.org/W4298289240","https://openalex.org/W4299522971","https://openalex.org/W4299801216","https://openalex.org/W6630875275","https://openalex.org/W6637568146","https://openalex.org/W6638319203","https://openalex.org/W6639102338","https://openalex.org/W6676647902","https://openalex.org/W6678470764","https://openalex.org/W6683074461","https://openalex.org/W6685183736","https://openalex.org/W6719057275","https://openalex.org/W6743714955","https://openalex.org/W6745554073","https://openalex.org/W6747225742","https://openalex.org/W6752083267","https://openalex.org/W6754048694","https://openalex.org/W6762122294"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2004831463","https://openalex.org/W2110287964","https://openalex.org/W2972976269","https://openalex.org/W3173727152"],"abstract_inverted_index":{"The":[0,70,99],"key":[1],"point":[2],"in":[3,29],"multimodal":[4,191],"learning":[5,32],"is":[6],"to":[7,36,93,104,116],"learn":[8,94],"semantic":[9,30,76,97,178,195],"alignment":[10,31,77,179],"that":[11,130,158,193,234],"finds":[12],"the":[13,53,75,128,135,138,152,182,199,202,235],"correspondence":[14,64],"between":[15],"sub-elements":[16,39,54,72,132,160],"of":[17,60,107,184,201,223],"instances":[18],"from":[19,55,123],"different":[20,41],"modality":[21,154],"data.":[22],"Attention":[23],"mechanism":[24,92],"has":[25],"shown":[26],"its":[27],"power":[28],"as":[33,112],"it":[34,50,188],"enables":[35],"densely":[37],"associate":[38],"across":[40],"modalities.":[42],"However,":[43],"for":[44,226],"each":[45,121],"sub-element,":[46],"existing":[47],"attention":[48,91,101,225,237],"aligns":[49],"with":[51,65],"all":[52],"another":[56,124],"modality,":[57],"while":[58,167],"most":[59],"them":[61],"have":[62],"no":[63],"it,":[66],"i.e.":[67],"irrelevant":[68,71,146,168,185],"sub-elements.":[69],"will":[73],"distract":[74],"if":[78],"they":[79],"are":[80,110],"also":[81],"attended.":[82],"In":[83],"this":[84],"paper,":[85],"we":[86,205,216],"propose":[87,217],"a":[88,105,174,218],"novel":[89],"focal":[90,100,203,224,236],"more":[95],"accurate":[96],"alignment.":[98,196],"sparsely":[102],"attends":[103],"subset":[106],"sub-elements,":[108,186],"which":[109],"identified":[111],"relevant":[113,131,144,159],"ones":[114,147,169],"according":[115],"their":[117],"posterior":[118,139,165],"probabilities":[119],"given":[120],"sub-element":[122],"modality.":[125],"Based":[126],"on":[127,209,231],"observation":[129],"mostly":[133],"describe":[134],"same":[136,153],"semantic,":[137],"probability":[140],"can":[141,238],"precisely":[142],"distinguish":[143],"and":[145,163,187,212,215,220,240],"by":[148,180],"taking":[149],"interactions":[150],"within":[151],"into":[155],"consideration,":[156],"such":[157],"get":[161,170],"higher":[162],"closer":[164],"probabilities,":[166],"lower":[171],"probabilities.":[172],"Such":[173],"design":[175],"learns":[176],"better":[177],"preventing":[181],"interference":[183],"facilitates":[189],"subsequent":[190],"tasks":[192],"demand":[194],"To":[197],"validate":[198],"effectiveness":[200],"attention,":[204],"conduct":[206],"extensive":[207],"experiments":[208],"image-text":[210],"matching":[211],"text-to-image":[213],"generation,":[214],"bidirectional":[219],"stacked":[221],"version":[222],"them,":[227],"respectively.":[228],"Experimental":[229],"results":[230],"benchmarks":[232],"show":[233],"significantly":[239],"consistently":[241],"outperform":[242],"state-of-the-arts.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
