{"id":"https://openalex.org/W4413994402","doi":"https://doi.org/10.32604/cmc.2025.067103","title":"Robust Audio-Visual Fusion for Emotion Recognition Based on Cross-Modal Learning under Noisy Conditions","display_name":"Robust Audio-Visual Fusion for Emotion Recognition Based on Cross-Modal Learning under Noisy Conditions","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4413994402","doi":"https://doi.org/10.32604/cmc.2025.067103"},"language":"en","primary_location":{"id":"doi:10.32604/cmc.2025.067103","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.067103","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.32604/cmc.2025.067103","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084068555","display_name":"A-Seong Moon","orcid":"https://orcid.org/0000-0001-9880-1401"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"A-Seong Moon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054402728","display_name":"Seungyeon Jeong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seungyeon Jeong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100414807","display_name":"Dong-Hee Kim","orcid":"https://orcid.org/0000-0002-9929-3864"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Donghee Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054106321","display_name":"Mohd Asyraf Zulkifley","orcid":"https://orcid.org/0000-0002-4010-3990"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mohd Asyraf Zulkifley","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064942426","display_name":"Bong-Soo Sohn","orcid":"https://orcid.org/0000-0003-4656-5659"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bong-Soo Sohn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100744039","display_name":"Jaesung Lee","orcid":"https://orcid.org/0000-0001-6474-1877"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaesung Lee","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084068555"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"85","issue":"2","first_page":"2851","last_page":"2872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8761000037193298,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8761000037193298,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8465999960899353,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13890","display_name":"Remote Sensing and Land Use","score":0.8453999757766724,"subfield":{"id":"https://openalex.org/subfields/1902","display_name":"Atmospheric Science"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.7952039241790771},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6668720841407776},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6261739730834961},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.581386387348175},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5662457346916199},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.524091899394989},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4487822353839874},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.16912785172462463},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0613921582698822}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.7952039241790771},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6668720841407776},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6261739730834961},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.581386387348175},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5662457346916199},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.524091899394989},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4487822353839874},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.16912785172462463},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0613921582698822},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.32604/cmc.2025.067103","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.067103","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.32604/cmc.2025.067103","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.067103","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2132555391","https://openalex.org/W2587982884","https://openalex.org/W2745497104","https://openalex.org/W2910165986","https://openalex.org/W2975019088","https://openalex.org/W3137028092","https://openalex.org/W3137646246","https://openalex.org/W3188766356","https://openalex.org/W4220887861","https://openalex.org/W4225726449","https://openalex.org/W4297499129","https://openalex.org/W4313591463","https://openalex.org/W4362721809","https://openalex.org/W4385989098","https://openalex.org/W4386615902","https://openalex.org/W4386826611","https://openalex.org/W4387573449","https://openalex.org/W4393397422","https://openalex.org/W4407994672"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3105646692","https://openalex.org/W4387914125","https://openalex.org/W3126677997","https://openalex.org/W1610857240"],"abstract_inverted_index":{"Emotion":[0],"recognition":[1,23],"under":[2,149],"uncontrolled":[3],"and":[4,40,70,80,87,96,183],"noisy":[5],"environments":[6],"presents":[7],"persistent":[8],"challenges":[9],"in":[10,127,165,187],"the":[11,91,124,157,160,188],"design":[12,182],"of":[13,123,151,159,190],"emotionally":[14],"responsive":[15],"systems.":[16],"The":[17,43,137,173,196],"current":[18],"study":[19,174],"introduces":[20],"an":[21],"audio-visual":[22],"framework":[24,45,92,138],"designed":[25],"to":[26,93,133,180],"address":[27],"performance":[28],"degradation":[29],"caused":[30],"by":[31,59],"environmental":[32],"interference,":[33],"such":[34],"as":[35,75],"background":[36],"noise,":[37],"overlapping":[38],"speech,":[39],"visual":[41,71],"obstructions.":[42],"proposed":[44,161],"employs":[46],"a":[47,120,177],"structured":[48],"fusion":[49,181],"approach,":[50],"combining":[51],"early-stage":[52],"feature-level":[53],"integration":[54],"with":[55],"decision-level":[56],"coordination":[57],"guided":[58],"temporal":[60,81],"attention":[61,104],"mechanisms.":[62],"Audio":[63],"data":[64,72],"are":[65,73,83],"transformed":[66],"into":[67],"mel-spectrogram":[68],"representations,":[69],"represented":[74],"raw":[76],"frame":[77],"sequences.":[78],"Spatial":[79],"features":[82],"extracted":[84],"through":[85],"convolutional":[86],"transformer-based":[88],"encoders,":[89],"allowing":[90],"capture":[94],"complementary":[95],"hierarchical":[97],"information":[98],"from":[99],"both":[100],"sources.":[101],"A":[102],"cross-modal":[103,152],"module":[105],"enables":[106],"selective":[107],"emphasis":[108],"on":[109,119,204],"relevant":[110],"signals":[111],"while":[112],"suppressing":[113],"modality-specific":[114],"noise.":[115],"Performance":[116],"is":[117,131,199],"validated":[118],"modified":[121],"version":[122],"AFEW":[125],"dataset,":[126],"which":[128],"controlled":[129,171],"noise":[130],"introduced":[132],"emulate":[134],"realistic":[135],"conditions.":[136],"achieves":[139],"higher":[140],"classification":[141],"accuracy":[142],"than":[143],"comparative":[144],"baselines,":[145],"confirming":[146],"increased":[147],"robustness":[148],"conditions":[150],"disruption.":[153],"This":[154],"result":[155],"demonstrates":[156],"suitability":[158],"method":[162],"for":[163],"deployment":[164],"practical":[166],"emotion-aware":[167],"technologies":[168],"operating":[169],"outside":[170],"environments.":[172],"also":[175],"contributes":[176],"systematic":[178],"approach":[179],"supports":[184],"further":[185],"exploration":[186],"direction":[189],"resilient":[191],"multimodal":[192],"emotion":[193],"analysis":[194],"frameworks.":[195],"source":[197],"code":[198],"publicly":[200],"available":[201],"at":[202],"(accessed":[203],"18":[205],"August":[206],"2025).":[207]},"counts_by_year":[],"updated_date":"2025-12-05T23:21:25.405358","created_date":"2025-10-10T00:00:00"}
