{"id":"https://openalex.org/W4380987202","doi":"https://doi.org/10.1109/access.2023.3287093","title":"On the Effect of Log-Mel Spectrogram Parameter Tuning for Deep Learning-Based Speech Emotion Recognition","display_name":"On the Effect of Log-Mel Spectrogram Parameter Tuning for Deep Learning-Based Speech Emotion Recognition","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4380987202","doi":"https://doi.org/10.1109/access.2023.3287093"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3287093","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287093","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10154046.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10154046.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092188094","display_name":"Azamat Mukhamediya","orcid":"https://orcid.org/0000-0001-5190-6847"},"institutions":[{"id":"https://openalex.org/I60559429","display_name":"Nazarbayev University","ror":"https://ror.org/052bx8q98","country_code":"KZ","type":"education","lineage":["https://openalex.org/I60559429"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Azamat Mukhamediya","raw_affiliation_strings":["Department of Electrical and Computer Engineering, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0001-5190-6847","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan","institution_ids":["https://openalex.org/I60559429"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057434469","display_name":"Siamac Fazli","orcid":"https://orcid.org/0000-0003-3397-0647"},"institutions":[{"id":"https://openalex.org/I60559429","display_name":"Nazarbayev University","ror":"https://ror.org/052bx8q98","country_code":"KZ","type":"education","lineage":["https://openalex.org/I60559429"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Siamac Fazli","raw_affiliation_strings":["Department of Computer Science, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0003-3397-0647","affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan","institution_ids":["https://openalex.org/I60559429"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040086890","display_name":"Amin Zollanvari","orcid":"https://orcid.org/0000-0002-9172-8413"},"institutions":[{"id":"https://openalex.org/I60559429","display_name":"Nazarbayev University","ror":"https://ror.org/052bx8q98","country_code":"KZ","type":"education","lineage":["https://openalex.org/I60559429"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Amin Zollanvari","raw_affiliation_strings":["Department of Electrical and Computer Engineering, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0002-9172-8413","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, School of Engineering and Digital Sciences, Nazarbayev University, Astana, Kazakhstan","institution_ids":["https://openalex.org/I60559429"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.5747,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.90516281,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"11","issue":null,"first_page":"61950","last_page":"61957"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8311474323272705},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8000762462615967},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.6626887917518616},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6424019932746887},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6167439818382263},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5834425687789917},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5472530722618103},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4620015323162079},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.45906856656074524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43972596526145935},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.42724478244781494},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4167991876602173},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3406689763069153}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8311474323272705},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8000762462615967},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.6626887917518616},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6424019932746887},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6167439818382263},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5834425687789917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5472530722618103},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4620015323162079},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45906856656074524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43972596526145935},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.42724478244781494},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4167991876602173},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3406689763069153}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3287093","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287093","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10154046.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:81b2dac27f4a4fbabd84349a52f5f00d","is_oa":true,"landing_page_url":"https://doaj.org/article/81b2dac27f4a4fbabd84349a52f5f00d","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 61950-61957 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3287093","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287093","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10154046.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G4853279189","display_name":null,"funder_award_id":"20122022FD4120","funder_id":"https://openalex.org/F4320327695","funder_display_name":"Nazarbayev University"},{"id":"https://openalex.org/G680224447","display_name":null,"funder_award_id":"021220FD1151","funder_id":"https://openalex.org/F4320327695","funder_display_name":"Nazarbayev University"}],"funders":[{"id":"https://openalex.org/F4320327695","display_name":"Nazarbayev University","ror":"https://ror.org/052bx8q98"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380987202.pdf","grobid_xml":"https://content.openalex.org/works/W4380987202.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W175750906","https://openalex.org/W1578856370","https://openalex.org/W1686810756","https://openalex.org/W2097117768","https://openalex.org/W2146334809","https://openalex.org/W2163605009","https://openalex.org/W2166725142","https://openalex.org/W2193413348","https://openalex.org/W2194775991","https://openalex.org/W2485020921","https://openalex.org/W2492473494","https://openalex.org/W2777186991","https://openalex.org/W2799331981","https://openalex.org/W2884739346","https://openalex.org/W2885005742","https://openalex.org/W2889191349","https://openalex.org/W2959133507","https://openalex.org/W2960833983","https://openalex.org/W2970737019","https://openalex.org/W2972498864","https://openalex.org/W3014377038","https://openalex.org/W3015240477","https://openalex.org/W3020839184","https://openalex.org/W3040201034","https://openalex.org/W3112103486","https://openalex.org/W3132191748","https://openalex.org/W3134751001","https://openalex.org/W3136524425","https://openalex.org/W3159778524","https://openalex.org/W3164680725","https://openalex.org/W3186192207","https://openalex.org/W3192216513","https://openalex.org/W3212554192","https://openalex.org/W4211116959","https://openalex.org/W4226153164","https://openalex.org/W4285106979","https://openalex.org/W4288765755","https://openalex.org/W4385245566","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6687566353","https://openalex.org/W6739901393","https://openalex.org/W6795062860"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2088854863","https://openalex.org/W2011227383","https://openalex.org/W2065606036","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W2016904525"],"abstract_inverted_index":{"Speech":[0],"emotion":[1,37],"recognition":[2],"(SER)":[3],"has":[4],"become":[5],"a":[6,19,24,147],"major":[7,127],"area":[8],"of":[9,45,72,99,105,176,183,220,223],"investigation":[10],"in":[11,113,142,215,226],"human-computer":[12],"interaction.":[13],"Conventionally,":[14],"SER":[15,114,170],"is":[16,52,58,86,89,93,111],"formulated":[17],"as":[18,203],"classification":[20,193],"problem":[21],"that":[22,85,90,116],"follows":[23],"common":[25,140],"methodology:":[26],"(i)":[27,174],"extracting":[28],"features":[29,77],"from":[30,78],"speech":[31],"signals;":[32],"and":[33,83,125,131,154,185,205,212,229],"(ii)":[34,186],"constructing":[35],"an":[36],"classifier":[38],"using":[39,68,166],"extracted":[40],"features.":[41],"With":[42],"the":[43,49,55,97,103,106,181,218],"advent":[44],"deep":[46,61],"learning,":[47],"however,":[48],"former":[50],"stage":[51],"integrated":[53],"into":[54,159],"latter.":[56],"That":[57],"to":[59,102,129,209],"say,":[60],"neural":[62],"networks":[63],"(DNNs),":[64],"which":[65],"are":[66],"trained":[67,107],"log-Mel":[69],"spectrograms":[70],"(LMS)":[71],"audio":[73],"waveforms,":[74],"extract":[75],"discriminative":[76],"LMS.":[79],"A":[80],"critical":[81],"issue,":[82],"one":[84],"often":[87],"overlooked,":[88],"this":[91,139,143],"procedure":[92],"done":[94],"without":[95],"relating":[96],"choice":[98],"LMS":[100,161,177,189,201,224],"parameters":[101,124,175,202,225],"performance":[104,182,194],"DNN":[108,134],"classifiers.":[109],"It":[110],"commonplace":[112],"studies":[115],"practitioners":[117],"assume":[118],"some":[119],"\u201cusual\u201d":[120,221],"values":[121,222],"for":[122],"these":[123],"devote":[126],"efforts":[128],"training":[130],"comparing":[132],"various":[133],"architectures.":[135],"In":[136,198],"contrast":[137,216],"with":[138,217],"approach,":[141],"work":[144],"we":[145],"choose":[146],"single":[148],"lightweight":[149],"pre-trained":[150],"architecture,":[151],"namely,":[152],"SqueezeNet,":[153],"shift":[155],"our":[156],"main":[157],"effort":[158],"tuning":[160,188,206],"parameters.":[162],"Our":[163],"empirical":[164],"results":[165],"three":[167],"publicly":[168],"available":[169],"datasets":[171],"show":[172],"that:":[173],"can":[178,195],"considerably":[179],"affect":[180],"DNNs;":[184],"by":[187],"parameters,":[190],"highly":[191],"competitive":[192],"be":[196],"achieved.":[197],"particular,":[199],"treating":[200],"hyperparameters":[204],"them":[207],"led":[208],"~23%,":[210],"~10%,":[211],"~11%":[213],"improvement":[214],"use":[219],"EmoDB,":[227],"IEMOCAP,":[228],"SAVEE":[230],"datasets,":[231],"respectively.":[232]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-16T09:24:06.705377","created_date":"2025-10-10T00:00:00"}
