{"id":"https://openalex.org/W2980520956","doi":"https://doi.org/10.1109/taffc.2019.2947464","title":"Spontaneous Speech Emotion Recognition Using Multiscale Deep Convolutional LSTM","display_name":"Spontaneous Speech Emotion Recognition Using Multiscale Deep Convolutional LSTM","publication_year":2019,"publication_date":"2019-10-17","ids":{"openalex":"https://openalex.org/W2980520956","doi":"https://doi.org/10.1109/taffc.2019.2947464","mag":"2980520956"},"language":"en","primary_location":{"id":"doi:10.1109/taffc.2019.2947464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2019.2947464","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101891025","display_name":"Shiqing Zhang","orcid":"https://orcid.org/0000-0001-8184-5088"},"institutions":[{"id":"https://openalex.org/I82760581","display_name":"Taizhou University","ror":"https://ror.org/04fzhyx73","country_code":"CN","type":"education","lineage":["https://openalex.org/I82760581"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiqing Zhang","raw_affiliation_strings":["Institute of Intelligent Information Processing, Taizhou University, Taizhou, P. R. China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Information Processing, Taizhou University, Taizhou, P. R. China","institution_ids":["https://openalex.org/I82760581"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021523224","display_name":"Xiaoming Zhao","orcid":"https://orcid.org/0000-0002-4708-4171"},"institutions":[{"id":"https://openalex.org/I82760581","display_name":"Taizhou University","ror":"https://ror.org/04fzhyx73","country_code":"CN","type":"education","lineage":["https://openalex.org/I82760581"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Zhao","raw_affiliation_strings":["Institute of Intelligent Information Processing, Taizhou University, Taizhou, P. R. China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Information Processing, Taizhou University, Taizhou, P. R. China","institution_ids":["https://openalex.org/I82760581"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111504451","display_name":"Qi Tian","orcid":"https://orcid.org/0009-0003-2676-5300"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]},{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["CN","SE"],"is_corresponding":false,"raw_author_name":"Qi Tian","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab, Shenzhen, P. R. China","Huawei Noah&#x0027"],"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab, Shenzhen, P. R. China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Noah&#x0027","institution_ids":["https://openalex.org/I4210159102"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101891025"],"corresponding_institution_ids":["https://openalex.org/I82760581"],"apc_list":null,"apc_paid":null,"fwci":7.4847,"has_fulltext":false,"cited_by_count":150,"citation_normalized_percentile":{"value":0.97645108,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"13","issue":"2","first_page":"680","last_page":"688"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7561953067779541},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7512366771697998},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7323288321495056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6399492025375366},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6361325979232788},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6081568598747253},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5816853046417236},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5618945360183716},{"id":"https://openalex.org/keywords/affective-computing","display_name":"Affective computing","score":0.5312169790267944},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5217512845993042}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7561953067779541},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7512366771697998},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7323288321495056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6399492025375366},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6361325979232788},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6081568598747253},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5816853046417236},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5618945360183716},{"id":"https://openalex.org/C6438553","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Affective computing","level":2,"score":0.5312169790267944},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5217512845993042}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taffc.2019.2947464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2019.2947464","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.44999998807907104,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2745919159","display_name":null,"funder_award_id":"LZ20F020002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5851559761","display_name":null,"funder_award_id":"61976149","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8745541972","display_name":null,"funder_award_id":"1803gy08","funder_id":"https://openalex.org/F4320336614","funder_display_name":"Science and Technology Plan Project of Taizhou"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336614","display_name":"Science and Technology Plan Project of Taizhou","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1581984155","https://openalex.org/W1686810756","https://openalex.org/W1930528368","https://openalex.org/W1963882359","https://openalex.org/W1976066595","https://openalex.org/W2003837801","https://openalex.org/W2032254851","https://openalex.org/W2042700175","https://openalex.org/W2063767522","https://openalex.org/W2064675550","https://openalex.org/W2074788634","https://openalex.org/W2080289724","https://openalex.org/W2080576537","https://openalex.org/W2087195460","https://openalex.org/W2087618018","https://openalex.org/W2099767163","https://openalex.org/W2100495367","https://openalex.org/W2117287331","https://openalex.org/W2137639365","https://openalex.org/W2144005487","https://openalex.org/W2161073241","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2223246223","https://openalex.org/W2239141610","https://openalex.org/W2243226955","https://openalex.org/W2274202079","https://openalex.org/W2277498883","https://openalex.org/W2295001676","https://openalex.org/W2295579880","https://openalex.org/W2336160298","https://openalex.org/W2399733683","https://openalex.org/W2490270993","https://openalex.org/W2552348884","https://openalex.org/W2576454451","https://openalex.org/W2585658440","https://openalex.org/W2610961739","https://openalex.org/W2614874155","https://openalex.org/W2703895418","https://openalex.org/W2750666523","https://openalex.org/W2766272105","https://openalex.org/W2889466822","https://openalex.org/W2919115771","https://openalex.org/W2964167669","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6731741027"],"related_works":["https://openalex.org/W1923358586","https://openalex.org/W2184242386","https://openalex.org/W2325729322","https://openalex.org/W3080495370","https://openalex.org/W4285597148","https://openalex.org/W2901531394","https://openalex.org/W1559262936","https://openalex.org/W4321599321","https://openalex.org/W2134707158","https://openalex.org/W2767348466"],"abstract_inverted_index":{"Recently,":[0],"emotion":[1,47,64,122,126],"recognition":[2,127],"in":[3,8,15,22,117],"real":[4,23],"sceneries":[5,24],"such":[6],"as":[7],"the":[9,37,82,85,100,103,110,158,164,168],"wild":[10],"has":[11],"attracted":[12],"extensive":[13],"attention":[14],"affective":[16],"computing,":[17],"because":[18],"existing":[19],"spontaneous":[20,62,154],"emotions":[21],"are":[25,141],"more":[26],"challenging":[27,153],"and":[28,160],"difficult":[29],"to":[30,76,108],"identify":[31],"than":[32],"other":[33],"emotions.":[34],"Motivated":[35],"by":[36,130,143],"diverse":[38],"effects":[39],"of":[40,43,84,90,102,138,167],"different":[41,125],"lengths":[42,137],"audio":[44],"spectrograms":[45],"on":[46,81,99,151],"identification,":[48],"this":[49],"paper":[50],"proposes":[51],"a":[52,67,93,145],"multiscale":[53],"deep":[54,68,78,94],"convolutional":[55,69],"long":[56],"short-term":[57],"memory":[58],"(LSTM)":[59],"framework":[60],"for":[61,120],"speech":[63],"recognition.":[65,123],"Initially,":[66],"neural":[70],"network":[71],"(CNN)":[72],"model":[73,96],"is":[74,97],"used":[75],"learn":[77],"segment-level":[79,105,139],"features":[80,107],"basis":[83,101],"created":[86],"image-like":[87],"three":[88],"channels":[89],"spectrograms.":[91],"Then,":[92],"LSTM":[95,134],"adopted":[98],"learned":[104],"CNN":[106,132],"capture":[109],"temporal":[111],"dependency":[112],"among":[113],"all":[114],"divided":[115],"segments":[116],"an":[118],"utterance":[119],"utterance-level":[121],"Finally,":[124],"results,":[128],"obtained":[129],"combining":[131],"with":[133],"at":[135],"multiple":[136],"spectrograms,":[140],"integrated":[142],"using":[144],"score-level":[146],"fusion":[147],"strategy.":[148],"Experimental":[149],"results":[150],"two":[152],"emotional":[155],"datasets,":[156],"i.e.,":[157],"AFEW5.0":[159],"BAUM-1s":[161],"databases,":[162],"demonstrate":[163],"promising":[165],"performance":[166],"proposed":[169],"method,":[170],"outperforming":[171],"state-of-the-art":[172],"methods.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":40},{"year":2024,"cited_by_count":33},{"year":2023,"cited_by_count":35},{"year":2022,"cited_by_count":22},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
