{"id":"https://openalex.org/W4307640249","doi":"https://doi.org/10.1145/3545572","title":"A Review on Methods and Applications in Multimodal Deep Learning","display_name":"A Review on Methods and Applications in Multimodal Deep Learning","publication_year":2022,"publication_date":"2022-10-27","ids":{"openalex":"https://openalex.org/W4307640249","doi":"https://doi.org/10.1145/3545572"},"language":"en","primary_location":{"id":"doi:10.1145/3545572","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3545572","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079163045","display_name":"Summaira Jabeen","orcid":"https://orcid.org/0000-0002-0655-8414"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Summaira Jabeen","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407758","display_name":"Xi Li","orcid":"https://orcid.org/0000-0003-3023-1662"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Li","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063978929","display_name":"Muhammad Shoib Amin","orcid":"https://orcid.org/0000-0001-9046-1502"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muhammad Shoib Amin","raw_affiliation_strings":["School of Software Engineering, East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"School of Software Engineering, East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042547419","display_name":"Omar El Farouk Bourahla","orcid":"https://orcid.org/0000-0002-7410-3825"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Omar Bourahla","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101818877","display_name":"Songyuan Li","orcid":"https://orcid.org/0000-0003-4052-1006"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songyuan Li","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101728852","display_name":"Abdul Jabbar","orcid":"https://orcid.org/0000-0002-0309-1476"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Abdul Jabbar","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5079163045"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":14.6049,"has_fulltext":false,"cited_by_count":161,"citation_normalized_percentile":{"value":0.99446231,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"19","issue":"2s","first_page":"1","last_page":"41"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.85691899061203},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.7705523371696472},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.688064694404602},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.6555567979812622},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6394056081771851},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.5492338538169861},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39901164174079895},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3545230031013489}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85691899061203},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.7705523371696472},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.688064694404602},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.6555567979812622},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6394056081771851},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.5492338538169861},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39901164174079895},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3545230031013489},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3545572","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3545572","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5299999713897705,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1820810521","display_name":null,"funder_award_id":"62225605","funder_id":"https://openalex.org/F4320336125","funder_display_name":"National Science Fund for Distinguished Young Scholars"},{"id":"https://openalex.org/G8778414158","display_name":null,"funder_award_id":"U20A20222","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320318398","display_name":"Ant Group","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336125","display_name":"National Science Fund for Distinguished Young Scholars","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":127,"referenced_works":["https://openalex.org/W6665699","https://openalex.org/W68733909","https://openalex.org/W102708294","https://openalex.org/W1516807289","https://openalex.org/W1572567476","https://openalex.org/W1575833922","https://openalex.org/W1596841185","https://openalex.org/W1607035479","https://openalex.org/W1786709202","https://openalex.org/W1889081078","https://openalex.org/W1974244877","https://openalex.org/W2015394094","https://openalex.org/W2016089260","https://openalex.org/W2021712846","https://openalex.org/W2026512742","https://openalex.org/W2053101950","https://openalex.org/W2081580037","https://openalex.org/W2083897630","https://openalex.org/W2085419064","https://openalex.org/W2109586012","https://openalex.org/W2111078031","https://openalex.org/W2119288237","https://openalex.org/W2120127208","https://openalex.org/W2132555391","https://openalex.org/W2146334809","https://openalex.org/W2151096985","https://openalex.org/W2151498684","https://openalex.org/W2168692779","https://openalex.org/W2185175083","https://openalex.org/W2186222003","https://openalex.org/W2277195237","https://openalex.org/W2342475039","https://openalex.org/W2496096353","https://openalex.org/W2583695460","https://openalex.org/W2587648059","https://openalex.org/W2594962660","https://openalex.org/W2607303097","https://openalex.org/W2619368999","https://openalex.org/W2619383789","https://openalex.org/W2746646173","https://openalex.org/W2754689878","https://openalex.org/W2755014019","https://openalex.org/W2766046458","https://openalex.org/W2766261529","https://openalex.org/W2766557690","https://openalex.org/W2767290858","https://openalex.org/W2785768523","https://openalex.org/W2788357188","https://openalex.org/W2792995953","https://openalex.org/W2883430806","https://openalex.org/W2892816441","https://openalex.org/W2906314281","https://openalex.org/W2909624725","https://openalex.org/W2909814607","https://openalex.org/W2911542843","https://openalex.org/W2912150279","https://openalex.org/W2915289525","https://openalex.org/W2946165673","https://openalex.org/W2950579554","https://openalex.org/W2963177331","https://openalex.org/W2963609956","https://openalex.org/W2963691546","https://openalex.org/W2964138343","https://openalex.org/W2964303913","https://openalex.org/W2965818302","https://openalex.org/W2976260559","https://openalex.org/W2985076077","https://openalex.org/W2996202528","https://openalex.org/W2996298259","https://openalex.org/W2996421194","https://openalex.org/W3000702891","https://openalex.org/W3003861054","https://openalex.org/W3004442222","https://openalex.org/W3007589762","https://openalex.org/W3011727199","https://openalex.org/W3012111773","https://openalex.org/W3012155113","https://openalex.org/W3015928967","https://openalex.org/W3027131706","https://openalex.org/W3034144331","https://openalex.org/W3034464851","https://openalex.org/W3036848992","https://openalex.org/W3036900224","https://openalex.org/W3039905287","https://openalex.org/W3044175177","https://openalex.org/W3044285380","https://openalex.org/W3046090708","https://openalex.org/W3048631361","https://openalex.org/W3048707520","https://openalex.org/W3080477479","https://openalex.org/W3084624816","https://openalex.org/W3086814465","https://openalex.org/W3092364816","https://openalex.org/W3092853178","https://openalex.org/W3095191006","https://openalex.org/W3098351727","https://openalex.org/W3101767943","https://openalex.org/W3102692100","https://openalex.org/W3111490429","https://openalex.org/W3122451732","https://openalex.org/W3132101720","https://openalex.org/W3137069976","https://openalex.org/W3143835353","https://openalex.org/W3168970348","https://openalex.org/W3185066916","https://openalex.org/W3193504637","https://openalex.org/W3195949276","https://openalex.org/W3198196812","https://openalex.org/W4236965008","https://openalex.org/W4242473587","https://openalex.org/W4252331534","https://openalex.org/W4287813440","https://openalex.org/W4297689875","https://openalex.org/W4298274365","https://openalex.org/W4298392976","https://openalex.org/W4298857617","https://openalex.org/W4299296451","https://openalex.org/W4301409532","https://openalex.org/W4393782427","https://openalex.org/W6634232107","https://openalex.org/W6676577389","https://openalex.org/W6736996214","https://openalex.org/W6744214710","https://openalex.org/W6748588790","https://openalex.org/W6765987481","https://openalex.org/W6777324918","https://openalex.org/W6782759637"],"related_works":["https://openalex.org/W2199184806","https://openalex.org/W4389505417","https://openalex.org/W2962931510","https://openalex.org/W127837312","https://openalex.org/W2922283411","https://openalex.org/W4384789578","https://openalex.org/W4380551887","https://openalex.org/W4285159263","https://openalex.org/W4387421317","https://openalex.org/W2904518532"],"abstract_inverted_index":{"Deep":[0],"Learning":[1],"has":[2,10,126],"implemented":[3],"a":[4],"wide":[5],"range":[6],"of":[7,19,52,71,79,102,110,132],"applications":[8,125,143],"and":[9,31,60,97,106],"become":[11],"increasingly":[12],"popular":[13],"in":[14,68,121,144],"recent":[15,111],"years.":[16],"The":[17],"goal":[18],"multimodal":[20,122,134],"deep":[21,123,135],"learning":[22,56,124,136],"(MMDL)":[23],"is":[24,138],"to":[25,58,119],"create":[26],"models":[27],"that":[28],"can":[29],"process":[30],"link":[32],"information":[33],"using":[34],"various":[35,64,133],"modalities.":[36],"Despite":[37],"the":[38,50,69,103,114],"extensive":[39],"development":[40],"made":[41],"for":[42,153],"unimodal":[43],"learning,":[44],"it":[45],"still":[46],"cannot":[47],"cover":[48],"all":[49],"aspects":[51],"human":[53],"learning.":[54],"Multimodal":[55],"helps":[57],"understand":[59],"analyze":[61],"better":[62],"when":[63],"senses":[65],"are":[66,150],"engaged":[67],"processing":[70],"information.":[72],"This":[73],"article":[74],"focuses":[75],"on":[76,141],"multiple":[77],"types":[78],"modalities,":[80],"i.e.,":[81],"image,":[82],"video,":[83],"text,":[84],"audio,":[85],"body":[86],"gestures,":[87],"facial":[88],"expressions,":[89],"physiological":[90],"signals,":[91],"flow,":[92],"RGB,":[93],"pose,":[94],"depth,":[95],"mesh,":[96],"point":[98],"cloud.":[99],"Detailed":[100],"analysis":[101],"baseline":[104],"approaches":[105],"an":[107],"in-depth":[108],"study":[109],"advancements":[112],"during":[113],"past":[115],"five":[116],"years":[117],"(2017":[118],"2021)":[120],"been":[127],"provided.":[128],"A":[129],"fine-grained":[130],"taxonomy":[131],"methods":[137],"proposed,":[139],"elaborating":[140],"different":[142],"more":[145],"depth.":[146],"Last,":[147],"main":[148],"issues":[149],"highlighted":[151],"separately":[152],"each":[154],"domain,":[155],"along":[156],"with":[157],"their":[158],"possible":[159],"future":[160],"research":[161],"directions.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":18},{"year":2025,"cited_by_count":72},{"year":2024,"cited_by_count":47},{"year":2023,"cited_by_count":24}],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2025-10-10T00:00:00"}
