{"id":"https://openalex.org/W4399376552","doi":"https://doi.org/10.1109/tmm.2024.3410133","title":"Improving Pre-Trained Model-Based Speech Emotion Recognition From a Low-Level Speech Feature Perspective","display_name":"Improving Pre-Trained Model-Based Speech Emotion Recognition From a Low-Level Speech Feature Perspective","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4399376552","doi":"https://doi.org/10.1109/tmm.2024.3410133"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3410133","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3410133","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ke Liu","orcid":"https://orcid.org/0000-0002-5765-2128"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ke Liu","raw_affiliation_strings":["Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060342584","display_name":"Jiwei Wei","orcid":"https://orcid.org/0000-0003-3912-1742"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiwei Wei","raw_affiliation_strings":["Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jie Zou","orcid":"https://orcid.org/0000-0002-0181-778X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Zou","raw_affiliation_strings":["Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peng Wang","orcid":"https://orcid.org/0000-0002-5397-9115"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Wang","raw_affiliation_strings":["Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100397616","display_name":"Yang Yang","orcid":"https://orcid.org/0000-0002-5070-4511"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yang","raw_affiliation_strings":["Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media and School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052993469","display_name":"Heng Tao Shen","orcid":"https://orcid.org/0000-0002-2999-2088"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Tao Shen","raw_affiliation_strings":["Center for Future Multimedia and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Multimedia and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":2.732,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91198165,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"10623","last_page":"10636"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8389999866485596,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8389999866485596,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8015999794006348,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.7824000120162964,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8418183326721191},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7819660902023315},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.6048853993415833},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5724050998687744},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5620220303535461},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.46691539883613586},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.42721864581108093},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4159839153289795},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36106330156326294},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3495962619781494}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8418183326721191},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7819660902023315},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.6048853993415833},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5724050998687744},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5620220303535461},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.46691539883613586},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.42721864581108093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4159839153289795},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36106330156326294},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3495962619781494},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3410133","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3410133","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1294623083","display_name":null,"funder_award_id":"62220106008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2464018058","display_name":null,"funder_award_id":"U20B2063","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G591136627","display_name":null,"funder_award_id":"62306067","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6386744776","display_name":null,"funder_award_id":"2022M720660","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W175750906","https://openalex.org/W1494198834","https://openalex.org/W2016896048","https://openalex.org/W2082107603","https://openalex.org/W2144005487","https://openalex.org/W2146334809","https://openalex.org/W2191779130","https://openalex.org/W2194775991","https://openalex.org/W2239141610","https://openalex.org/W2566079294","https://openalex.org/W2570915410","https://openalex.org/W2590019597","https://openalex.org/W2592702372","https://openalex.org/W2739418057","https://openalex.org/W2739444023","https://openalex.org/W2752782242","https://openalex.org/W2889325879","https://openalex.org/W2889374687","https://openalex.org/W2891488835","https://openalex.org/W2959133507","https://openalex.org/W2972811324","https://openalex.org/W2973181312","https://openalex.org/W2974743569","https://openalex.org/W2981076146","https://openalex.org/W2982461576","https://openalex.org/W2998674125","https://openalex.org/W3012461129","https://openalex.org/W3015308237","https://openalex.org/W3015489952","https://openalex.org/W3034552520","https://openalex.org/W3035212740","https://openalex.org/W3081192838","https://openalex.org/W3095789936","https://openalex.org/W3160039712","https://openalex.org/W3162475537","https://openalex.org/W3169472988","https://openalex.org/W3183430956","https://openalex.org/W3195259391","https://openalex.org/W3197642003","https://openalex.org/W3206597437","https://openalex.org/W3208480086","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3215440557","https://openalex.org/W4205383840","https://openalex.org/W4221162872","https://openalex.org/W4226419249","https://openalex.org/W4283800180","https://openalex.org/W4288038809","https://openalex.org/W4293519468","https://openalex.org/W4296068595","https://openalex.org/W4312402957","https://openalex.org/W4313051036","https://openalex.org/W4320713010","https://openalex.org/W4361994820","https://openalex.org/W4372263296","https://openalex.org/W4375869379","https://openalex.org/W4378364836","https://openalex.org/W4379033883","https://openalex.org/W4385255252","https://openalex.org/W4385329631","https://openalex.org/W4390874621","https://openalex.org/W4391853880","https://openalex.org/W4392172995","https://openalex.org/W4394642372","https://openalex.org/W6712588427","https://openalex.org/W6757817989","https://openalex.org/W6760803314","https://openalex.org/W6764726619","https://openalex.org/W6780218876","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W3126677997","https://openalex.org/W1610857240","https://openalex.org/W2981428355","https://openalex.org/W1834994814","https://openalex.org/W2041273198","https://openalex.org/W1599055764","https://openalex.org/W2131711534","https://openalex.org/W2149163000","https://openalex.org/W2962858469","https://openalex.org/W2289873871"],"abstract_inverted_index":{"Multi-view":[0],"speech":[1,76,96],"emotion":[2,144],"recognition":[3],"(SER)":[4],"based":[5,65],"on":[6,36,66,88,179,186,193,201,207],"the":[7,14,24,31,50,67,71,82,94,127,131,165,171,180,187,194,202,208],"pre-trained":[8,68,83],"model":[9,25,69,84],"has":[10],"gained":[11],"attention":[12],"in":[13,22,27,93],"last":[15],"two":[16],"years,":[17],"which":[18],"shows":[19],"great":[20],"potential":[21],"improving":[23],"performance":[26,56],"speaker-independent":[28],"scenarios.":[29],"However,":[30],"existing":[32,217],"work":[33],"either":[34],"relies":[35],"various":[37],"fine-tuning":[38,81],"methods":[39,218],"or":[40],"uses":[41],"excessive":[42],"feature":[43,97,156],"views":[44],"with":[45,54],"complex":[46],"fusion":[47,158],"strategies,":[48],"causing":[49],"increase":[51],"of":[52,73,130,143,175],"complexity":[53],"limited":[55],"benefit.":[57],"In":[58,164],"this":[59],"paper,":[60],"we":[61,79,150],"improve":[62],"multi-view":[63],"SER":[64],"from":[70,134],"perspective":[72],"a":[74,105,152,221],"low-level":[75,95],"feature.":[77],"Specifically,":[78],"forgo":[80],"and":[85,141,173,177,184,191,197,199,219],"instead":[86],"focus":[87],"learning":[89,142],"effective":[90,155],"features":[91,132],"hidden":[92],"mel-scale":[98],"frequency":[99],"cepstral":[100],"coefficient":[101],"(MFCC).":[102],"We":[103],"propose":[104],"<bold":[106,111,114,117],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[107,109,112,115,118,121],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">t</b>wo-<bold":[108],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">s</b>tream":[110],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">p</b>ooling":[113],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">c</b>hannel":[116],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">a</b>ttention":[119],"(<bold":[120],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">TsPCA</b>)":[122],"module":[123,137],"to":[124,160],"discriminatively":[125],"weight":[126],"channel":[128],"dimensions":[129],"derived":[133],"MFCC.":[135],"This":[136],"enables":[138],"inter-channel":[139],"interaction":[140],"sequence":[145],"information":[146],"across":[147],"channels.":[148],"Furthermore,":[149],"design":[151],"simple":[153],"but":[154],"view":[157],"strategy":[159],"learn":[161],"robust":[162],"representations.":[163],"comparison":[166],"experiments,":[167],"our":[168,213],"method":[169,214],"achieves":[170,220],"WA":[172],"UA":[174],"73.97%/74.69%":[176],"74.61%/75.66%":[178],"IEMOCAP":[181],"dataset,":[182,189,196],"97.21%":[183],"97.11%":[185],"Emo-DB":[188],"77.08%":[190],"77.34%":[192],"RAVDESS":[195],"74.38%":[198],"71.43%":[200],"SAVEE":[203],"dataset.":[204],"Extensive":[205],"experiments":[206],"four":[209],"datasets":[210],"demonstrate":[211],"that":[212],"consistently":[215],"surpasses":[216],"new":[222],"State-of-the-Art":[223],"result.":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
