{"id":"https://openalex.org/W4362650848","doi":"https://doi.org/10.1109/aciiw57231.2022.10086021","title":"Multiple attention convolutional-recurrent neural networks for speech emotion recognition","display_name":"Multiple attention convolutional-recurrent neural networks for speech emotion recognition","publication_year":2022,"publication_date":"2022-10-18","ids":{"openalex":"https://openalex.org/W4362650848","doi":"https://doi.org/10.1109/aciiw57231.2022.10086021"},"language":"en","primary_location":{"id":"doi:10.1109/aciiw57231.2022.10086021","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aciiw57231.2022.10086021","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100361544","display_name":"Zhihao Zhang","orcid":"https://orcid.org/0009-0002-9052-3309"},"institutions":[{"id":"https://openalex.org/I118987531","display_name":"Anhui Jianzhu University","ror":"https://ror.org/0108wjw08","country_code":"CN","type":"education","lineage":["https://openalex.org/I118987531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhihao Zhang","raw_affiliation_strings":["School of electronic and information engineering, Anhui Jianzhu University,Anhui International Joint Research Center for Ancient Architecture Intellisencing and Multi-Dimensional Modeling,HeFei,China","Anhui International Joint Research Center for Ancient Architecture Intellisencing and Multi-Dimensional Modeling, School of electronic and information engineering, Anhui Jianzhu University, HeFei, China"],"affiliations":[{"raw_affiliation_string":"School of electronic and information engineering, Anhui Jianzhu University,Anhui International Joint Research Center for Ancient Architecture Intellisencing and Multi-Dimensional Modeling,HeFei,China","institution_ids":["https://openalex.org/I118987531"]},{"raw_affiliation_string":"Anhui International Joint Research Center for Ancient Architecture Intellisencing and Multi-Dimensional Modeling, School of electronic and information engineering, Anhui Jianzhu University, HeFei, China","institution_ids":["https://openalex.org/I118987531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079571569","display_name":"Kunxia Wang","orcid":"https://orcid.org/0000-0003-1471-5886"},"institutions":[{"id":"https://openalex.org/I118987531","display_name":"Anhui Jianzhu University","ror":"https://ror.org/0108wjw08","country_code":"CN","type":"education","lineage":["https://openalex.org/I118987531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kunxia Wang","raw_affiliation_strings":["Higher Education Institutes, School of electronic and information engineering, Anhui Jianzhu University,Key Laboratory of Architectural Acoustic Environment of Anhui,HeFei,China","Key Laboratory of Architectural Acoustic Environment of Anhui, Higher Education Institutes, School of electronic and information engineering, Anhui Jianzhu University, HeFei, China"],"affiliations":[{"raw_affiliation_string":"Higher Education Institutes, School of electronic and information engineering, Anhui Jianzhu University,Key Laboratory of Architectural Acoustic Environment of Anhui,HeFei,China","institution_ids":["https://openalex.org/I118987531"]},{"raw_affiliation_string":"Key Laboratory of Architectural Acoustic Environment of Anhui, Higher Education Institutes, School of electronic and information engineering, Anhui Jianzhu University, HeFei, China","institution_ids":["https://openalex.org/I118987531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100361544"],"corresponding_institution_ids":["https://openalex.org/I118987531"],"apc_list":null,"apc_paid":null,"fwci":0.3699,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65985587,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8002458810806274},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6890993118286133},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6870518326759338},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5534670948982239},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5518975853919983},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5376452207565308},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5142697691917419},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.49566352367401123},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.48994725942611694},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.47250616550445557},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.467191219329834},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4668278098106384},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4287196397781372},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.36840349435806274}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8002458810806274},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6890993118286133},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6870518326759338},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5534670948982239},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5518975853919983},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5376452207565308},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5142697691917419},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.49566352367401123},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.48994725942611694},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.47250616550445557},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.467191219329834},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4668278098106384},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4287196397781372},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.36840349435806274},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aciiw57231.2022.10086021","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aciiw57231.2022.10086021","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W175750906","https://openalex.org/W1849277567","https://openalex.org/W2023937851","https://openalex.org/W2087618018","https://openalex.org/W2146334809","https://openalex.org/W2399733683","https://openalex.org/W2578895956","https://openalex.org/W2625297138","https://openalex.org/W2703895418","https://openalex.org/W2790854021","https://openalex.org/W2885005742","https://openalex.org/W2889325879","https://openalex.org/W2889374687","https://openalex.org/W2922509574","https://openalex.org/W2939129695","https://openalex.org/W2940259008","https://openalex.org/W2963420686","https://openalex.org/W2997399314","https://openalex.org/W3000894155","https://openalex.org/W3006705189","https://openalex.org/W3035502324","https://openalex.org/W3097488938","https://openalex.org/W3118826611","https://openalex.org/W3126625480","https://openalex.org/W3132302778","https://openalex.org/W3138410940","https://openalex.org/W3139270985","https://openalex.org/W3200034843","https://openalex.org/W3205428167","https://openalex.org/W3209572141","https://openalex.org/W3213648457","https://openalex.org/W4220725970","https://openalex.org/W4224234075","https://openalex.org/W4233923317","https://openalex.org/W4289752563"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W3179495260","https://openalex.org/W1976719989","https://openalex.org/W3127543252","https://openalex.org/W2065606036","https://openalex.org/W2942893872","https://openalex.org/W2016904525"],"abstract_inverted_index":{"Speech":[0],"Emotion":[1],"Recognition":[2],"is":[3,52,215],"of":[4,11,18,39,67,189],"great":[5],"significance":[6],"in":[7,26,60,82,133],"the":[8,19,48,57,61,68,86,125,129,148,154,182,185,196],"research":[9],"field":[10],"human-computer":[12],"interaction":[13],"and":[14,105,116,141,158,192,210],"affective":[15],"computing.":[16],"One":[17],"major":[20],"challenges":[21],"for":[22],"SER":[23],"now":[24],"lies":[25],"how":[27],"to":[28,54,74,84,123,147,151,167],"explore":[29],"effective":[30,161],"emotional":[31,58,77,126,156],"features":[32,119,172],"from":[33],"lengthy":[34],"utterances.":[35],"However,":[36],"since":[37],"most":[38,218],"existing":[40],"deep-learning":[41],"based":[42,96],"SERs":[43],"adopt":[44],"Log-Mel":[45],"spectrograms":[46],"as":[47,137],"input":[49],"model,":[50],"it":[51,72],"unable":[53],"fully":[55],"convey":[56],"information":[59],"speech.":[62],"Furthermore,":[63],"limited":[64],"extraction":[65],"ability":[66],"model":[69,135,183],"may":[70],"make":[71],"difficult":[73],"extract":[75],"key":[76,155],"representations.":[78],"As":[79],"a":[80,91],"result,":[81],"order":[83],"address":[85],"above":[87],"issues,":[88],"we":[89],"propose":[90],"new":[92],"convolutional":[93,101],"recurrent":[94],"network":[95,103,110],"on":[97,153,207],"multiple":[98,130],"attention,":[99],"including":[100],"neural":[102],"(CNN)":[104],"bidirectional":[106],"long":[107],"short-term":[108],"memory":[109],"(BiLSTM)":[111],"modules,":[112],"using":[113],"extracted":[114],"Mel-spectrums":[115],"Fourier":[117],"Coefficient":[118],"respectively,":[120],"which":[121,214],"helps":[122],"complement":[124],"information.":[127,177],"Further,":[128],"attention":[131,140,143,164],"mechanisms":[132,144],"our":[134],"are":[136,145],"follows:":[138],"Spatial":[139],"channel":[142],"added":[146],"CNN":[149],"module":[150],"focus":[152],"area":[157],"locate":[159],"more":[160],"features.":[162],"Temporal":[163],"gives":[165],"weights":[166],"different":[168],"time":[169],"series":[170],"segment":[171],"after":[173],"BiLSTM":[174],"extracts":[175],"sequence":[176],"Experimental":[178],"results":[179],"show":[180],"that":[181],"achieves":[184],"WA":[186],"(weighted":[187],"accuracy)":[188,199],"87.9%,":[190],"76.5%,":[191],"75.2%":[193],"respectively":[194,206],"while":[195],"UA":[197],"(unweighted":[198],"stands":[200],"at":[201],"87.6%,":[202],"73.5%,":[203],"70.1":[204],"%":[205],"EMODB,":[208],"IEMOCAP,":[209],"EESDB":[211],"speech":[212],"datasets,":[213],"better":[216],"than":[217],"state-of-the-art":[219],"methods.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
