{"id":"https://openalex.org/W4210782598","doi":"https://doi.org/10.1109/asru51503.2021.9687938","title":"Speech Emotion Recognition Using Semi-Supervised Learning with Efficient Labeling Strategies","display_name":"Speech Emotion Recognition Using Semi-Supervised Learning with Efficient Labeling Strategies","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210782598","doi":"https://doi.org/10.1109/asru51503.2021.9687938"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9687938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687938","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089591444","display_name":"Zhi Zhu","orcid":"https://orcid.org/0000-0002-1525-9395"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhi Zhu","raw_affiliation_strings":["Fairy Devices Inc.,Japan","Fairy Devices Inc., Japan"],"affiliations":[{"raw_affiliation_string":"Fairy Devices Inc.,Japan","institution_ids":[]},{"raw_affiliation_string":"Fairy Devices Inc., Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054596832","display_name":"Yoshinao Sato","orcid":"https://orcid.org/0000-0003-0657-0269"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoshinao Sato","raw_affiliation_strings":["Fairy Devices Inc.,Japan","Fairy Devices Inc., Japan"],"affiliations":[{"raw_affiliation_string":"Fairy Devices Inc.,Japan","institution_ids":[]},{"raw_affiliation_string":"Fairy Devices Inc., Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5089591444"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0322,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.83344527,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"358","last_page":"365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.728522002696991},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.6637560129165649},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.6192383170127869},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5972934365272522},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5694818496704102},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5572128891944885},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.5006489753723145},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.4850861132144928},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4562950134277344},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4090912342071533},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38761743903160095},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36853930354118347},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.16153132915496826}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.728522002696991},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.6637560129165649},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.6192383170127869},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5972934365272522},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5694818496704102},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5572128891944885},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.5006489753723145},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.4850861132144928},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4562950134277344},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4090912342071533},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38761743903160095},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36853930354118347},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16153132915496826},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9687938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687938","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4699999988079071,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W9014458","https://openalex.org/W1983108229","https://openalex.org/W2030931454","https://openalex.org/W2142518823","https://openalex.org/W2146334809","https://openalex.org/W2149273804","https://openalex.org/W2167277498","https://openalex.org/W2404535984","https://openalex.org/W2552810951","https://openalex.org/W2746207007","https://openalex.org/W2747664154","https://openalex.org/W2761514455","https://openalex.org/W2801728680","https://openalex.org/W2801959488","https://openalex.org/W2885005742","https://openalex.org/W2936774411","https://openalex.org/W2942837495","https://openalex.org/W2944004378","https://openalex.org/W2972602947","https://openalex.org/W3002872170","https://openalex.org/W3015141382","https://openalex.org/W3026041220","https://openalex.org/W3030589078","https://openalex.org/W3035160371","https://openalex.org/W3086923691","https://openalex.org/W3097936578","https://openalex.org/W3160733670","https://openalex.org/W3197949759","https://openalex.org/W4299627282","https://openalex.org/W4394658063","https://openalex.org/W6600399172","https://openalex.org/W6680957539","https://openalex.org/W6682171051","https://openalex.org/W6742734243","https://openalex.org/W6743220854","https://openalex.org/W6778373876","https://openalex.org/W6784458597","https://openalex.org/W6785281223"],"related_works":["https://openalex.org/W4312414840","https://openalex.org/W34092691","https://openalex.org/W2794908468","https://openalex.org/W2531570999","https://openalex.org/W2943467239","https://openalex.org/W1571801203","https://openalex.org/W4206276646","https://openalex.org/W101422005","https://openalex.org/W192740413","https://openalex.org/W3004135598"],"abstract_inverted_index":{"The":[0,90],"collection":[1],"of":[2,5,22,69,104,128],"large":[3],"amounts":[4],"labeled":[6,40,70,77],"data":[7,80],"for":[8,152],"speech":[9,56,153],"emotion":[10,57,154],"recognition":[11],"requires":[12],"considerable":[13],"time":[14],"and":[15,41,67,78,126],"effort.":[16],"As":[17],"a":[18,95,147],"result,":[19],"the":[20,49,62,65,83,87,99,105,109,113,121,124,129,140],"sizes":[21],"existing":[23],"corpora":[24],"are":[25,136],"limited.":[26],"One":[27],"promising":[28],"solution":[29],"to":[30,55],"this":[31,45,73],"difficulty":[32],"is":[33],"semi-supervised":[34],"learning,":[35],"i.e.,":[36],"learning":[37,118],"from":[38],"both":[39],"unlabeled":[42,79],"data.":[43,71],"In":[44],"study,":[46],"we":[47,75],"applied":[48],"noisy":[50],"student":[51],"training":[52],"(NST)":[53],"method":[54,101],"recognition.":[58,155],"We":[59],"experimentally":[60],"investigate":[61],"trade-off":[63],"between":[64],"amount":[66,125],"reliability":[68],"For":[72],"purpose,":[74],"prepared":[76],"by":[81],"lim-iting":[82],"available":[84,130],"annotations":[85,106,122,131],"in":[86,138],"CREMA-D":[88],"dataset.":[89],"experimental":[91],"results":[92],"showed":[93],"that":[94],"model":[96],"trained":[97,115],"using":[98,116],"NST":[100],"with":[102,119],"some":[103],"achieved":[107],"almost":[108],"same":[110],"performance":[111],"as":[112],"one":[114],"supervised":[117],"all":[120],"if":[123],"reliabil-ity":[127],"were":[132],"appropriate.":[133],"Our":[134],"findings":[135],"significant":[137],"identifying":[139],"most":[141],"efficient":[142],"labeling":[143],"strategy":[144],"when":[145],"utilizing":[146],"large-scale":[148],"dataset":[149],"without":[150],"labels":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
