{"id":"https://openalex.org/W4412985033","doi":"https://doi.org/10.1109/tce.2025.3596056","title":"MSMF-MIL: Multi-Scale Mixed Feature-Based Multiple Instance Learning for Speech Emotion Recognition","display_name":"MSMF-MIL: Multi-Scale Mixed Feature-Based Multiple Instance Learning for Speech Emotion Recognition","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4412985033","doi":"https://doi.org/10.1109/tce.2025.3596056"},"language":"en","primary_location":{"id":"doi:10.1109/tce.2025.3596056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2025.3596056","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100358079","display_name":"Dongdong Li","orcid":"https://orcid.org/0000-0002-1880-8054"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dongdong Li","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-1880-8054","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101616858","display_name":"Hongmei Yang","orcid":"https://orcid.org/0000-0002-6535-2862"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongmei Yang","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035486030","display_name":"Zihan Song","orcid":"https://orcid.org/0000-0003-2778-2543"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihan Song","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100621319","display_name":"Zhe Wang","orcid":"https://orcid.org/0000-0002-3759-2041"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-3759-2041","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100358079"],"corresponding_institution_ids":["https://openalex.org/I143593769"],"apc_list":null,"apc_paid":null,"fwci":1.1457,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.79630944,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"71","issue":"3","first_page":"7539","last_page":"7550"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9660000205039978,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9595999717712402,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6821117401123047},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6134403347969055},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.50075364112854},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.49160251021385193},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.48334571719169617},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4668572247028351},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.440940797328949},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4239305257797241}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6821117401123047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6134403347969055},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.50075364112854},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.49160251021385193},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.48334571719169617},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4668572247028351},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.440940797328949},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4239305257797241},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tce.2025.3596056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2025.3596056","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8628124579","display_name":null,"funder_award_id":"62276098","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8827074322","display_name":null,"funder_award_id":"ZS2024001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W175750906","https://openalex.org/W2080289724","https://openalex.org/W2093174546","https://openalex.org/W2098239572","https://openalex.org/W2110119381","https://openalex.org/W2146334809","https://openalex.org/W2170638740","https://openalex.org/W2972452068","https://openalex.org/W2972463723","https://openalex.org/W2972971501","https://openalex.org/W2997512706","https://openalex.org/W3092468857","https://openalex.org/W3097827011","https://openalex.org/W3132302778","https://openalex.org/W3159124063","https://openalex.org/W3162950882","https://openalex.org/W3210971248","https://openalex.org/W4205567678","https://openalex.org/W4214839894","https://openalex.org/W4232917604","https://openalex.org/W4296886713","https://openalex.org/W4307569623","https://openalex.org/W4312729513","https://openalex.org/W4316661260","https://openalex.org/W4318831690","https://openalex.org/W4319840113","https://openalex.org/W4321510659","https://openalex.org/W4352977805","https://openalex.org/W4372269797","https://openalex.org/W4379033883","https://openalex.org/W4381786694","https://openalex.org/W4386041382","https://openalex.org/W4386195880","https://openalex.org/W4387530839","https://openalex.org/W4387682244","https://openalex.org/W4387789680","https://openalex.org/W4388510410","https://openalex.org/W4388900360","https://openalex.org/W4391407010","https://openalex.org/W4391883442","https://openalex.org/W4392266896","https://openalex.org/W4392860069","https://openalex.org/W4392903804","https://openalex.org/W4399374391","https://openalex.org/W4399617776","https://openalex.org/W4399894740","https://openalex.org/W4402334360","https://openalex.org/W4404317055","https://openalex.org/W4404533470","https://openalex.org/W4408441416"],"related_works":["https://openalex.org/W3147584709","https://openalex.org/W2977677679","https://openalex.org/W1992327129","https://openalex.org/W2381986121","https://openalex.org/W2370918718","https://openalex.org/W2256933480","https://openalex.org/W2027854990","https://openalex.org/W3126677997","https://openalex.org/W1610857240","https://openalex.org/W4407198734"],"abstract_inverted_index":{"Speech":[0,123],"emotion":[1,130,180],"in":[2],"natural":[3],"utterances":[4],"is":[5,43],"inherently":[6],"complex":[7],"and":[8,80,97,125,141,150,156,176],"non-uniform,":[9],"with":[10,51],"key":[11,65],"emotional":[12],"cues":[13],"often":[14],"confined":[15],"to":[16,63,95],"brief":[17],"segments.":[18],"To":[19],"address":[20],"this":[21],"challenge,":[22],"we":[23],"propose":[24],"a":[25,46,68,83],"novel":[26],"multi-scale":[27,90,168],"mixed":[28],"feature-based":[29],"framework":[30,71],"that":[31,103,165],"leverages":[32],"Multiple":[33],"Instance":[34],"Learning":[35],"(MIL).":[36],"In":[37],"our":[38,70,134,166],"approach,":[39],"each":[40,52],"speech":[41,101,129,179],"utterance":[42],"transformed":[44],"into":[45],"\u201cbag\u201d":[47],"containing":[48],"multiple":[49],"segments,":[50],"segment":[53],"treated":[54],"as":[55],"an":[56],"individual":[57],"instance.":[58],"Inspired":[59],"by":[60],"MIL\u2019s":[61],"capability":[62],"identify":[64],"instances":[66],"within":[67],"set,":[69],"employs":[72],"CNN-based":[73],"MIL":[74],"models":[75],"at":[76],"both":[77],"the":[78,99,110,118,126,174],"frame":[79],"utterance-levels,":[81],"while":[82],"ResNet-based":[84],"model":[85],"extracts":[86],"segment-level":[87],"features.":[88],"These":[89,162],"representations":[91],"are":[92],"then":[93],"fused":[94],"isolate":[96],"emphasize":[98],"critical":[100],"segments":[102],"express":[104],"dominant":[105],"emotions.":[106],"Experimental":[107],"evaluations":[108],"on":[109,145,154,160],"Interactive":[111],"Emotional":[112,122],"Dyadic":[113],"Motion":[114],"Capture":[115],"Database":[116,120],"(IEMOCAP),":[117],"Berlin":[119],"of":[121,178],"(Emo-DB)":[124],"spontaneous":[127],"URDU-language":[128],"database":[131],"(URDU)":[132],"demonstrate":[133],"approach\u2019s":[135],"effectiveness,":[136],"achieving":[137],"67.76%":[138],"weighted":[139,148],"accuracy":[140,144,149,153,159,177],"58.39%":[142],"unweighted":[143,152,158],"IEMOCAP,":[146],"93.80%":[147],"91.92%":[151],"Emo-DB,":[155],"95.75%":[157],"URDU.":[161],"results":[163],"confirm":[164],"MIL-based,":[167],"feature":[169],"fusion":[170],"strategy":[171],"significantly":[172],"enhances":[173],"robustness":[175],"recognition":[181],"systems.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-22T09:01:20.584952","created_date":"2025-10-10T00:00:00"}
