{"id":"https://openalex.org/W4403024442","doi":"https://doi.org/10.1109/pacrim61180.2024.10690221","title":"Cross-Attention Dual-Stream Fusion for Speech Emotion Recognition","display_name":"Cross-Attention Dual-Stream Fusion for Speech Emotion Recognition","publication_year":2024,"publication_date":"2024-08-21","ids":{"openalex":"https://openalex.org/W4403024442","doi":"https://doi.org/10.1109/pacrim61180.2024.10690221"},"language":"en","primary_location":{"id":"doi:10.1109/pacrim61180.2024.10690221","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pacrim61180.2024.10690221","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Pacific Rim Conference on Communications, Computers and Signal Processing (PACRIM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071143244","display_name":"Shaode Yu","orcid":"https://orcid.org/0000-0002-3412-2159"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shaode Yu","raw_affiliation_strings":["School of Information and Communication Engineering, Communication University of China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Communication University of China,Beijing,China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040257028","display_name":"Jiajian Meng","orcid":"https://orcid.org/0009-0000-5448-634X"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiajian Meng","raw_affiliation_strings":["School of Information and Communication Engineering, Communication University of China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Communication University of China,Beijing,China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025916705","display_name":"Bing Zhu","orcid":"https://orcid.org/0000-0002-6068-4040"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bing Zhu","raw_affiliation_strings":["School of Information and Communication Engineering, Communication University of China,Beijing,China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Communication University of China,Beijing,China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100868001","display_name":"Qiurui Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiurui Sun","raw_affiliation_strings":["Beijing Normal University,Center of Information &#x0026; Network Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing Normal University,Center of Information &#x0026; Network Technology,Beijing,China","institution_ids":["https://openalex.org/I25254941"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071143244"],"corresponding_institution_ids":["https://openalex.org/I75689368"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.187065,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9309999942779541,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9309999942779541,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6503150463104248},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6220336556434631},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6036434173583984},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5828931331634521},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5425939559936523},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32058364152908325},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07240104675292969}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6503150463104248},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6220336556434631},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6036434173583984},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5828931331634521},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5425939559936523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32058364152908325},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07240104675292969},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pacrim61180.2024.10690221","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pacrim61180.2024.10690221","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Pacific Rim Conference on Communications, Computers and Signal Processing (PACRIM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2146334809","https://openalex.org/W2194775991","https://openalex.org/W2265846598","https://openalex.org/W2408520939","https://openalex.org/W2625297138","https://openalex.org/W2740721704","https://openalex.org/W2760103357","https://openalex.org/W2766272105","https://openalex.org/W2803098682","https://openalex.org/W2885005742","https://openalex.org/W2889374687","https://openalex.org/W2936113082","https://openalex.org/W2937154351","https://openalex.org/W2973181312","https://openalex.org/W3015489952","https://openalex.org/W3015969913","https://openalex.org/W3094805191","https://openalex.org/W3160039712","https://openalex.org/W3166762187","https://openalex.org/W3197156295","https://openalex.org/W3197580070","https://openalex.org/W3197642003","https://openalex.org/W3209059054","https://openalex.org/W4220887861","https://openalex.org/W4221089191","https://openalex.org/W4221162872","https://openalex.org/W4286436501","https://openalex.org/W4313887688","https://openalex.org/W4317380951","https://openalex.org/W4372260013","https://openalex.org/W4375868941","https://openalex.org/W4375869379","https://openalex.org/W4379033883","https://openalex.org/W4386158972","https://openalex.org/W4388837981","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2317351040","https://openalex.org/W2952466936","https://openalex.org/W1988622314","https://openalex.org/W2393949104","https://openalex.org/W2099421762","https://openalex.org/W3046201198","https://openalex.org/W2530546662","https://openalex.org/W2515288625","https://openalex.org/W3126677997","https://openalex.org/W1610857240"],"abstract_inverted_index":{"Speech":[0],"emotion":[1,29,67,141],"recognition":[2],"aims":[3],"for":[4,66,75],"recognizing":[5],"human":[6],"subjective":[7],"emotions":[8,86],"through":[9],"in-depth":[10],"audio":[11,64],"signal":[12],"analysis.":[13],"It":[14],"benefits":[15],"a":[16],"wide":[17],"range":[18],"of":[19,52,59,78,84,94],"downstream":[20],"applications":[21],"and":[22,31,57,69,90,105,108,117,143,158],"tasks.":[23],"However,":[24],"how":[25,32],"to":[26,33],"comprehensively":[27],"encode":[28],"cues":[30],"effectively":[34],"fuse":[35],"implicit":[36],"information":[37],"still":[38],"remains":[39],"challenging.":[40],"To":[41],"address":[42],"the":[43,82,92,95,123,137,144,147,154,164],"issues,":[44],"dual-stream":[45,79,102,138],"representation":[46,157],"is":[47,73,97],"designed":[48],"that":[49,122],"uses":[50],"full-training":[51],"TextRCNN":[53],"from":[54,62],"log-Mel":[55],"spectrograms":[56],"fine-tuning":[58],"pre-trained":[60,106],"HuBERT":[61],"raw":[63],"signals":[65],"encoding,":[68],"cross-attention":[70],"fusion":[71,110,161],"(CAF)":[72],"developed":[74],"valid":[76],"integration":[77],"outputs.":[80],"On":[81],"IEMOCAP":[83],"four":[85],"(\u201chappy+excited\u201d,":[87],"\u201csad\u201d,":[88],"\u201cangry\u201d,":[89],"\u201cneutral\u201d),":[91],"effectiveness":[93],"framework":[96,125],"evaluated":[98],"by":[99],"using":[100],"different":[101,109],"encoders":[103],"(DPCNN":[104],"Wave2Vec2)":[107],"modules":[111],"(summation,":[112],"concatenation,":[113],"feature-wise":[114],"linear":[115],"modulation,":[116],"CAF).":[118],"Experimental":[119],"results":[120],"reveal":[121],"proposed":[124],"achieves":[126],"promising":[127],"performance":[128],"(weighted":[129],"accuracy,":[130,133],"0.6965;":[131],"unweighted":[132],"0.7088;":[134],"F1-score,":[135],"0.7084),":[136],"encoder":[139],"enriches":[140],"representation,":[142],"CAF":[145],"outperforms":[146],"other":[148],"three":[149],"modules.":[150],"For":[151],"further":[152],"improving":[153],"performance,":[155],"multi-stream":[156],"multi-branch":[159],"feature":[160],"would":[162],"be":[163,170],"research":[165],"direction.":[166],"Our":[167],"code":[168],"will":[169],"available":[171],"on":[172],"GitHub":[173],"(https://github.com/NicoYuCN/serCAF).":[174]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
