{"id":"https://openalex.org/W4413946288","doi":"https://doi.org/10.3390/computers14090361","title":"Prosodic Spatio-Temporal Feature Fusion with Attention Mechanisms for Speech Emotion Recognition","display_name":"Prosodic Spatio-Temporal Feature Fusion with Attention Mechanisms for Speech Emotion Recognition","publication_year":2025,"publication_date":"2025-08-31","ids":{"openalex":"https://openalex.org/W4413946288","doi":"https://doi.org/10.3390/computers14090361"},"language":"en","primary_location":{"id":"doi:10.3390/computers14090361","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14090361","pdf_url":"https://www.mdpi.com/2073-431X/14/9/361/pdf?version=1756631590","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2073-431X/14/9/361/pdf?version=1756631590","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006047636","display_name":"Kristiawan Nugroho","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101417","display_name":"Universitas Stikubank","ror":"https://ror.org/01zq89n20","country_code":"ID","type":"education","lineage":["https://openalex.org/I4210101417"]}],"countries":["ID"],"is_corresponding":true,"raw_author_name":"Kristiawan Nugroho","raw_affiliation_strings":["Department of Information Technology, Faculty of Information Technology and Industry, Universitas Stikubank, Semarang 50241, Indonesia"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Faculty of Information Technology and Industry, Universitas Stikubank, Semarang 50241, Indonesia","institution_ids":["https://openalex.org/I4210101417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027928435","display_name":"Imam Husni Al Amin","orcid":"https://orcid.org/0000-0003-4998-5456"},"institutions":[{"id":"https://openalex.org/I4210101417","display_name":"Universitas Stikubank","ror":"https://ror.org/01zq89n20","country_code":"ID","type":"education","lineage":["https://openalex.org/I4210101417"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Imam Husni Al Amin","raw_affiliation_strings":["Department of Industrial Engineering, Faculty of Information Technology and Industry, Universitas Stikubank, Semarang 50241, Indonesia"],"affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering, Faculty of Information Technology and Industry, Universitas Stikubank, Semarang 50241, Indonesia","institution_ids":["https://openalex.org/I4210101417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026531880","display_name":"Nina Anggraeni Noviasari","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159899","display_name":"Universitas Muhammadiyah Semarang","ror":"https://ror.org/05hra0856","country_code":"ID","type":"education","lineage":["https://openalex.org/I4210159899"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Nina Anggraeni Noviasari","raw_affiliation_strings":["Faculty of Medicine, Universitas Muhammadiyah Semarang, Semarang 50273, Indonesia"],"affiliations":[{"raw_affiliation_string":"Faculty of Medicine, Universitas Muhammadiyah Semarang, Semarang 50273, Indonesia","institution_ids":["https://openalex.org/I4210159899"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019714454","display_name":"De Rosal Ignatius Moses Setiadi","orcid":"https://orcid.org/0000-0001-6615-4457"},"institutions":[{"id":"https://openalex.org/I4210127958","display_name":"Universitas Dian Nuswantoro","ror":"https://ror.org/02csxcg02","country_code":"ID","type":"education","lineage":["https://openalex.org/I4210127958"]}],"countries":["ID"],"is_corresponding":true,"raw_author_name":"De Rosal Ignatius Moses Setiadi","raw_affiliation_strings":["Research Centre for Quantum Computing and Materials Informatics, Faculty of Computer Science, Universitas Dian Nuswantoro, Semarang 50131, Indonesia"],"affiliations":[{"raw_affiliation_string":"Research Centre for Quantum Computing and Materials Informatics, Faculty of Computer Science, Universitas Dian Nuswantoro, Semarang 50131, Indonesia","institution_ids":["https://openalex.org/I4210127958"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006047636","https://openalex.org/A5019714454"],"corresponding_institution_ids":["https://openalex.org/I4210101417","https://openalex.org/I4210127958"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":3.2654,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92770769,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"14","issue":"9","first_page":"361","last_page":"361"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.6918205618858337},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6268632411956787},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6106469035148621},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5568973422050476},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.41817545890808105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35586342215538025},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09054049849510193}],"concepts":[{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.6918205618858337},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6268632411956787},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6106469035148621},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5568973422050476},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.41817545890808105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35586342215538025},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09054049849510193},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/computers14090361","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14090361","pdf_url":"https://www.mdpi.com/2073-431X/14/9/361/pdf?version=1756631590","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:90f9178b2de246e9bb60929d2addc898","is_oa":true,"landing_page_url":"https://doaj.org/article/90f9178b2de246e9bb60929d2addc898","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computers, Vol 14, Iss 9, p 361 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computers14090361","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14090361","pdf_url":"https://www.mdpi.com/2073-431X/14/9/361/pdf?version=1756631590","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413946288.pdf","grobid_xml":"https://content.openalex.org/works/W4413946288.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W2560824463","https://openalex.org/W3120595804","https://openalex.org/W3153137765","https://openalex.org/W3184620678","https://openalex.org/W3209253786","https://openalex.org/W4200079780","https://openalex.org/W4205555939","https://openalex.org/W4213448838","https://openalex.org/W4223499953","https://openalex.org/W4223606270","https://openalex.org/W4226023758","https://openalex.org/W4226190360","https://openalex.org/W4280556124","https://openalex.org/W4283121045","https://openalex.org/W4283733223","https://openalex.org/W4283771248","https://openalex.org/W4296708560","https://openalex.org/W4382399592","https://openalex.org/W4383426413","https://openalex.org/W4386449513","https://openalex.org/W4391982840","https://openalex.org/W4391997162","https://openalex.org/W4393065972","https://openalex.org/W4393284319","https://openalex.org/W4399322270","https://openalex.org/W4401333176","https://openalex.org/W4401748363","https://openalex.org/W4402559305","https://openalex.org/W4403440069","https://openalex.org/W4405028185","https://openalex.org/W4406461749","https://openalex.org/W4407157560","https://openalex.org/W4409786620","https://openalex.org/W4410121549","https://openalex.org/W4410226327","https://openalex.org/W4410236540","https://openalex.org/W4410320317","https://openalex.org/W4411002313","https://openalex.org/W4411507887","https://openalex.org/W4411565457","https://openalex.org/W4412581350"],"related_works":["https://openalex.org/W3147584709","https://openalex.org/W2099421762","https://openalex.org/W2530546662","https://openalex.org/W2967030268","https://openalex.org/W2977677679","https://openalex.org/W2185253430","https://openalex.org/W1992327129","https://openalex.org/W4210345652","https://openalex.org/W3126677997","https://openalex.org/W1610857240"],"abstract_inverted_index":{"Speech":[0],"Emotion":[1],"Recognition":[2],"(SER)":[3],"plays":[4],"a":[5,44,138,163],"vital":[6],"role":[7],"in":[8,25],"supporting":[9],"applications":[10],"such":[11],"as":[12],"healthcare,":[13],"human\u2013computer":[14],"interaction,":[15],"and":[16,29,39,62,78,88,95,137,149,159],"security.":[17],"However,":[18],"many":[19],"existing":[20],"approaches":[21],"still":[22],"face":[23],"challenges":[24],"achieving":[26],"robust":[27],"generalization":[28],"maintaining":[30],"high":[31],"recall,":[32,158],"particularly":[33],"for":[34,166],"emotions":[35],"related":[36],"to":[37,104,114],"stress":[38],"anxiety.":[40],"This":[41],"study":[42],"proposes":[43],"dual-stream":[45],"hybrid":[46],"model":[47,113],"that":[48,146],"combines":[49],"prosodic":[50,148],"features":[51,90,152],"with":[52,93,128,153],"spatio-temporal":[53],"representations":[54],"derived":[55],"from":[56],"the":[57,63,107,112,117,125],"Multitaper":[58],"Mel-Frequency":[59],"Spectrogram":[60,66],"(MTMFS)":[61],"Constant-Q":[64],"Transform":[65],"(CQTS).":[67],"Prosodic":[68],"cues,":[69],"including":[70],"pitch,":[71],"intensity,":[72],"jitter,":[73],"shimmer,":[74],"HNR,":[75],"pause":[76],"rate,":[77,80],"speech":[79],"were":[81,91],"processed":[82],"using":[83],"dense":[84],"layers,":[85],"while":[86],"MTMFS":[87],"CQTS":[89],"encoded":[92],"CNN":[94],"BiGRU.":[96],"A":[97],"Multi-Head":[98],"Attention":[99],"mechanism":[100],"was":[101],"then":[102],"applied":[103],"adaptively":[105],"fuse":[106],"two":[108],"feature":[109],"streams,":[110],"allowing":[111],"focus":[115],"on":[116,124],"most":[118],"relevant":[119],"emotional":[120],"cues.":[121],"Evaluations":[122],"conducted":[123],"RAVDESS":[126],"dataset":[127],"subject-independent":[129],"5-fold":[130],"cross-validation":[131],"demonstrated":[132],"an":[133],"accuracy":[134],"of":[135,141],"97.64%":[136],"macro":[139],"F1-score":[140],"0.9745.":[142],"These":[143],"results":[144],"confirm":[145],"combining":[147],"advanced":[150],"spectrogram":[151],"attention-based":[154],"fusion":[155],"improves":[156],"precision,":[157],"overall":[160],"robustness,":[161],"offering":[162],"promising":[164],"framework":[165],"more":[167],"reliable":[168],"SER":[169],"systems.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
