{"id":"https://openalex.org/W7154401703","doi":"https://doi.org/10.1007/s00371-026-04475-1","title":"Advancing multimodal emotion analysis: a hybrid deep learning approach with intermediate fusion and multi-task learning","display_name":"Advancing multimodal emotion analysis: a hybrid deep learning approach with intermediate fusion and multi-task learning","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7154401703","doi":"https://doi.org/10.1007/s00371-026-04475-1"},"language":"en","primary_location":{"id":"doi:10.1007/s00371-026-04475-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00371-026-04475-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00371-026-04475-1.pdf","source":{"id":"https://openalex.org/S73060445","display_name":"The Visual Computer","issn_l":"0178-2789","issn":["0178-2789","1432-2315"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Visual Computer","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00371-026-04475-1.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102301863","display_name":"Fatih Kaya","orcid":null},"institutions":[{"id":"https://openalex.org/I6573832","display_name":"Turgut \u00d6zal University","ror":"https://ror.org/03r7b1f79","country_code":"TR","type":"education","lineage":["https://openalex.org/I6573832"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Fatih Kaya","raw_affiliation_strings":["Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey"],"raw_orcid":"https://orcid.org/0009-0003-6621-9240","affiliations":[{"raw_affiliation_string":"Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey","institution_ids":["https://openalex.org/I6573832"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008889214","display_name":"Yunus Emre Karaca","orcid":"https://orcid.org/0000-0002-9398-084X"},"institutions":[{"id":"https://openalex.org/I6573832","display_name":"Turgut \u00d6zal University","ror":"https://ror.org/03r7b1f79","country_code":"TR","type":"education","lineage":["https://openalex.org/I6573832"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Yunus Emre Karaca","raw_affiliation_strings":["Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey"],"raw_orcid":"https://orcid.org/0000-0002-9398-084X","affiliations":[{"raw_affiliation_string":"Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey","institution_ids":["https://openalex.org/I6573832"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030583080","display_name":"Serpil Aslan","orcid":"https://orcid.org/0000-0001-8009-063X"},"institutions":[{"id":"https://openalex.org/I6573832","display_name":"Turgut \u00d6zal University","ror":"https://ror.org/03r7b1f79","country_code":"TR","type":"education","lineage":["https://openalex.org/I6573832"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Serpil Aslan","raw_affiliation_strings":["Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey"],"raw_orcid":"https://orcid.org/0000-0001-8009-063X","affiliations":[{"raw_affiliation_string":"Software Engineering Department, Malatya Turgut Ozal University, Malatya, Turkey","institution_ids":["https://openalex.org/I6573832"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5133561143","display_name":"Muhammed Y\u0131ld\u0131r\u0131m","orcid":null},"institutions":[{"id":"https://openalex.org/I143396566","display_name":"F\u0131rat University","ror":"https://ror.org/05teb7b63","country_code":"TR","type":"education","lineage":["https://openalex.org/I143396566"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Muhammed Y\u0131ld\u0131r\u0131m","raw_affiliation_strings":["Department of Artificial Intelligence and Data Engineering, Firat University, Elazig, 23119, T\u00fcrkiye"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence and Data Engineering, Firat University, Elazig, 23119, T\u00fcrkiye","institution_ids":["https://openalex.org/I143396566"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5030583080"],"corresponding_institution_ids":["https://openalex.org/I6573832"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.6559769,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"42","issue":"6","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.4368000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.4368000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.3513000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.010900000110268593,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.7581999897956848},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6351000070571899},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5888000130653381},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4478999972343445},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4438000023365021},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.4050000011920929},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.391400009393692},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics","score":0.39089998602867126}],"concepts":[{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.7581999897956848},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7269999980926514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7134000062942505},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6351000070571899},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5888000130653381},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4478999972343445},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4059999883174896},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.4050000011920929},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C77660652","wikidata":"https://www.wikidata.org/wiki/Q150971","display_name":"Computer graphics","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C6438553","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Affective computing","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3379000127315521},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32330000400543213},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2971999943256378},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.26809999346733093}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00371-026-04475-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00371-026-04475-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00371-026-04475-1.pdf","source":{"id":"https://openalex.org/S73060445","display_name":"The Visual Computer","issn_l":"0178-2789","issn":["0178-2789","1432-2315"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Visual Computer","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00371-026-04475-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00371-026-04475-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00371-026-04475-1.pdf","source":{"id":"https://openalex.org/S73060445","display_name":"The Visual Computer","issn_l":"0178-2789","issn":["0178-2789","1432-2315"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Visual Computer","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4069375693798065,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7154401703.pdf","grobid_xml":"https://content.openalex.org/works/W7154401703.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1965343349","https://openalex.org/W2003856922","https://openalex.org/W2048835603","https://openalex.org/W2075456404","https://openalex.org/W2254214813","https://openalex.org/W2265228180","https://openalex.org/W2584561145","https://openalex.org/W2590924698","https://openalex.org/W2913340405","https://openalex.org/W2946165673","https://openalex.org/W2958165250","https://openalex.org/W2965373594","https://openalex.org/W2998884477","https://openalex.org/W3111911126","https://openalex.org/W3209862954","https://openalex.org/W4225650823","https://openalex.org/W4226143637","https://openalex.org/W4242175757","https://openalex.org/W4292408695","https://openalex.org/W4297499129","https://openalex.org/W4306152456","https://openalex.org/W4313558932","https://openalex.org/W4322743615","https://openalex.org/W4361010055","https://openalex.org/W4377234272","https://openalex.org/W4381485879","https://openalex.org/W4385145829","https://openalex.org/W4385494537","https://openalex.org/W4387807040","https://openalex.org/W4388004032","https://openalex.org/W4388342507","https://openalex.org/W4390430684","https://openalex.org/W4391566182","https://openalex.org/W4391692403","https://openalex.org/W4392138877","https://openalex.org/W4392161509","https://openalex.org/W4395026787","https://openalex.org/W4396610936","https://openalex.org/W4399387814","https://openalex.org/W4401500823","https://openalex.org/W4405255170","https://openalex.org/W4406480601","https://openalex.org/W4406559819","https://openalex.org/W4407144615","https://openalex.org/W4408993183","https://openalex.org/W4410693039","https://openalex.org/W4410775193","https://openalex.org/W4411336748","https://openalex.org/W4411504396","https://openalex.org/W4412630015","https://openalex.org/W4413119630"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Emotion":[1],"analysis":[2,164],"is":[3],"a":[4,59],"critical":[5],"research":[6,168],"domain":[7],"focused":[8],"on":[9,119],"detecting":[10],"the":[11,100,120,124,128,149,159],"emotional":[12],"states":[13],"of":[14,127,161],"individuals":[15],"or":[16],"communities":[17],"across":[18],"multiple":[19],"data":[20],"modalities,":[21],"including":[22],"text,":[23],"images,":[24],"and":[25,52,67,75,85,95,112,138,153,169],"audio.":[26],"While":[27],"substantial":[28],"progress":[29],"has":[30],"been":[31],"made":[32],"in":[33,165],"unimodal":[34],"(text-based)":[35],"sentiment":[36],"analysis,":[37],"real-world":[38,170],"scenarios":[39],"often":[40],"involve":[41],"multimodal":[42,108,144,162],"data,":[43],"making":[44],"integrated":[45],"approaches":[46],"essential":[47],"for":[48,157],"capturing":[49],"contextual":[50],"richness":[51],"improving":[53],"predictive":[54],"accuracy.":[55],"This":[56],"study":[57],"introduces":[58],"hybrid":[60],"deep":[61],"learning":[62,77],"model":[63],"that":[64],"combines":[65],"text":[66],"visual":[68,89],"features":[69],"through":[70,93],"an":[71],"intermediate":[72],"fusion":[73],"mechanism":[74],"multi-task":[76],"framework.":[78],"Textual":[79],"inputs":[80,90],"are":[81,91],"processed":[82],"using":[83],"RoBERTa":[84],"BiGRU":[86],"layers,":[87],"while":[88],"analyzed":[92],"ViT":[94],"ResNet50":[96],"architectures":[97],"enhanced":[98],"by":[99],"Convolutional":[101],"Block":[102],"Attention":[103],"Module":[104],"(CBAM).":[105],"The":[106],"fused":[107],"representations":[109],"enable":[110],"simultaneous":[111],"more":[113],"robust":[114],"emotion":[115,163],"classification.":[116],"Experimental":[117],"results":[118],"MVSA":[121],"dataset":[122],"demonstrate":[123],"superior":[125],"performance":[126],"proposed":[129],"model,":[130],"achieving":[131],"96.02%":[132],"accuracy,":[133],"95.51%":[134],"precision,":[135],"94.07%":[136],"recall,":[137],"94.73%":[139],"F1-score,":[140],"outperforming":[141],"several":[142],"state-of-the-art":[143],"benchmarks.":[145],"These":[146],"findings":[147],"underscore":[148],"model\u2019s":[150],"methodological":[151],"contributions":[152],"its":[154],"strong":[155],"potential":[156],"advancing":[158],"field":[160],"both":[166],"academic":[167],"applications.":[171]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-04-15T00:00:00"}
