{"id":"https://openalex.org/W4405306055","doi":"https://doi.org/10.1109/besc64747.2024.10780726","title":"Speech Emotion Recognition Using Mel Spectrogram HPCA and Variational Mode Decomposition","display_name":"Speech Emotion Recognition Using Mel Spectrogram HPCA and Variational Mode Decomposition","publication_year":2024,"publication_date":"2024-08-16","ids":{"openalex":"https://openalex.org/W4405306055","doi":"https://doi.org/10.1109/besc64747.2024.10780726"},"language":"en","primary_location":{"id":"doi:10.1109/besc64747.2024.10780726","is_oa":false,"landing_page_url":"https://doi.org/10.1109/besc64747.2024.10780726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 11th International Conference on Behavioural and Social Computing (BESC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046168577","display_name":"David Hason Rudd","orcid":"https://orcid.org/0000-0002-4507-5087"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"David Hason Rudd","raw_affiliation_strings":["The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039469208","display_name":"Xingyi Gao","orcid":"https://orcid.org/0009-0009-3468-4125"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xingyi Gao","raw_affiliation_strings":["The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100714271","display_name":"Md. Rafiqul Islam","orcid":"https://orcid.org/0000-0002-8129-8368"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Md Rafiqul Islam","raw_affiliation_strings":["Information Systems (Data Analytics), Australian Institute of Higher Education (AIH),Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"Information Systems (Data Analytics), Australian Institute of Higher Education (AIH),Sydney,Australia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102878677","display_name":"Huan Huo","orcid":"https://orcid.org/0000-0003-2440-714X"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Huan Huo","raw_affiliation_strings":["The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051512158","display_name":"Guandong Xu","orcid":"https://orcid.org/0000-0003-4493-6663"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Guandong Xu","raw_affiliation_strings":["The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"The University of Technology Sydney,Faculty of Engineering and IT,Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5046168577"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25823205,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6717000007629395,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6717000007629395,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5952000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13717","display_name":"Advanced Algorithms and Applications","score":0.5773000121116638,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9296391010284424},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7034161686897278},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6754397749900818},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5515913963317871},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5083824992179871},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.4298551678657532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4259692132472992},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3558048605918884},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.11326989531517029},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.06495034694671631}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9296391010284424},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7034161686897278},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6754397749900818},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5515913963317871},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5083824992179871},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.4298551678657532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4259692132472992},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3558048605918884},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.11326989531517029},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.06495034694671631},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/besc64747.2024.10780726","is_oa":false,"landing_page_url":"https://doi.org/10.1109/besc64747.2024.10780726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 11th International Conference on Behavioural and Social Computing (BESC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7779936576","display_name":null,"funder_award_id":"DP22010371,LE220100078,DP200101374,LP170100891","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W175750906","https://openalex.org/W2000982976","https://openalex.org/W2023937851","https://openalex.org/W2055911634","https://openalex.org/W2057624533","https://openalex.org/W2090431713","https://openalex.org/W2111460811","https://openalex.org/W2552348884","https://openalex.org/W2766756589","https://openalex.org/W2789368914","https://openalex.org/W2889685942","https://openalex.org/W2889717020","https://openalex.org/W2909337224","https://openalex.org/W2944458161","https://openalex.org/W2970737019","https://openalex.org/W2985653130","https://openalex.org/W2997700007","https://openalex.org/W3008039831","https://openalex.org/W3048786211","https://openalex.org/W3081192838","https://openalex.org/W3127320366","https://openalex.org/W4224283293","https://openalex.org/W4285112472","https://openalex.org/W4378472825","https://openalex.org/W4386041382","https://openalex.org/W4387479675","https://openalex.org/W6633727632","https://openalex.org/W6689296595","https://openalex.org/W6729014267"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2897924318","https://openalex.org/W2138997758","https://openalex.org/W1923358586","https://openalex.org/W2184242386","https://openalex.org/W2325729322","https://openalex.org/W3126677997","https://openalex.org/W1610857240"],"abstract_inverted_index":{"The":[0],"rapid":[1],"evolution":[2],"of":[3,15,142],"affective":[4],"computing":[5],"demands":[6],"sophisticated":[7],"methodologies":[8,42],"to":[9,32,61,112,149],"enhance":[10],"the":[11,79,89,93,133,145],"reliability":[12],"and":[13,57,68,92,105,116,126],"effectiveness":[14],"speech":[16,38],"emotion":[17,39],"recognition":[18,40],"(SER).":[19],"This":[20,51],"study":[21],"integrates":[22],"harmonic-percussive":[23],"component":[24],"analysis":[25],"(HPCA)":[26],"with":[27],"variational":[28],"mode":[29,66],"decomposition":[30,60],"(VMD)":[31],"overcome":[33],"various":[34],"drawbacks":[35],"for":[36,132],"conventional":[37],"(SER)":[41],"that":[43,77],"primarily":[44],"rely":[45],"on":[46,123],"stand-alone":[47],"feature":[48,55,74,81,119],"extraction":[49,56],"techniques.":[50],"implementation":[52],"refines":[53],"acoustic":[54,101],"optimizes":[58],"VMD":[59],"prevent":[62],"information":[63],"loss":[64],"from":[65],"duplication":[67],"mixing":[69],"problems.":[70],"We":[71],"propose":[72],"a":[73,84],"map":[75],"generator":[76],"channels":[78],"enhanced":[80],"vectors":[82],"into":[83,109],"convolutional":[85],"neural":[86],"network,":[87],"specifically":[88],"VGG16":[90],"model,":[91],"model":[94,136],"is":[95],"further":[96],"enriched":[97],"by":[98],"incorporating":[99],"diverse":[100],"features":[102],"including":[103],"HP":[104],"log":[106],"Mel":[107],"spectro-grams":[108],"two-dimensional":[110],"spaces":[111],"intensify":[113],"data":[114],"augmentation":[115],"enrich":[117],"emotional":[118],"representation.":[120],"Extensive":[121],"testing":[122],"Berlin":[124],"EMO-DB":[125],"RAVDESS":[127],"databases":[128],"confirmed":[129],"positive":[130],"impacts":[131],"proposed":[134,146],"HP-VMD":[135],"performance,":[137],"achieving":[138],"robust":[139],"classification":[140],"accuracy":[141],"96.67%.":[143],"Thus,":[144],"integrated":[147],"approach":[148],"developing":[150],"SER":[151],"systems":[152],"significantly":[153],"enhances":[154],"empathetic":[155],"human":[156],"computer":[157],"interactions.":[158]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
