{"id":"https://openalex.org/W4372267458","doi":"https://doi.org/10.1109/icassp49357.2023.10097135","title":"Adapting a Self-Supervised Speech Representation for Noisy Speech Emotion Recognition by Using Contrastive Teacher-Student Learning","display_name":"Adapting a Self-Supervised Speech Representation for Noisy Speech Emotion Recognition by Using Contrastive Teacher-Student Learning","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372267458","doi":"https://doi.org/10.1109/icassp49357.2023.10097135"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097135","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097135","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024273749","display_name":"Seong-Gyun Leem","orcid":"https://orcid.org/0000-0002-1175-1577"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Seong-Gyun Leem","raw_affiliation_strings":["The University of Texas at Dallas,Department of Electrical and Computer Engineering","Department of Electrical and Computer Engineering, The University of Texas at Dallas"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Dallas,Department of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at Dallas","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026166163","display_name":"Daniel Fulford","orcid":"https://orcid.org/0000-0003-4405-9031"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Fulford","raw_affiliation_strings":["Boston University,Occupational Therapy and Psychological &#x0026; Brain Sciences"],"affiliations":[{"raw_affiliation_string":"Boston University,Occupational Therapy and Psychological &#x0026; Brain Sciences","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041695925","display_name":"Jukka\u2010Pekka Onnela","orcid":"https://orcid.org/0000-0001-6613-8668"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210101190","display_name":"Cancer Research And Biostatistics","ror":"https://ror.org/01575p865","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210101190"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jukka-Pekka Onnela","raw_affiliation_strings":["Harvard University,Harvard T.H. Chan School of Public Health,Department of Biostatistics","Department of Biostatistics, Harvard T.H. Chan School of Public Health, Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University,Harvard T.H. Chan School of Public Health,Department of Biostatistics","institution_ids":["https://openalex.org/I4210101190","https://openalex.org/I136199984"]},{"raw_affiliation_string":"Department of Biostatistics, Harvard T.H. Chan School of Public Health, Harvard University","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007724150","display_name":"David E. Gard","orcid":"https://orcid.org/0000-0002-0446-4000"},"institutions":[{"id":"https://openalex.org/I71838634","display_name":"San Francisco State University","ror":"https://ror.org/05ykr0121","country_code":"US","type":"education","lineage":["https://openalex.org/I71838634"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Gard","raw_affiliation_strings":["San Francisco State University,Psychology Department","Psychology Department, San Francisco State University"],"affiliations":[{"raw_affiliation_string":"San Francisco State University,Psychology Department","institution_ids":["https://openalex.org/I71838634"]},{"raw_affiliation_string":"Psychology Department, San Francisco State University","institution_ids":["https://openalex.org/I71838634"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040793194","display_name":"Carlos Busso","orcid":"https://orcid.org/0000-0002-4075-4072"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Busso","raw_affiliation_strings":["The University of Texas at Dallas,Department of Electrical and Computer Engineering","Department of Electrical and Computer Engineering, The University of Texas at Dallas"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Dallas,Department of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at Dallas","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024273749"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":1.834,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85846635,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8054587841033936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7602361440658569},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6623559594154358},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5791184902191162},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5634300112724304},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5094009041786194},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42868903279304504},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3609572649002075},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34651798009872437},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32794320583343506}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8054587841033936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7602361440658569},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6623559594154358},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5791184902191162},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5634300112724304},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5094009041786194},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42868903279304504},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3609572649002075},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34651798009872437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32794320583343506},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097135","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097135","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2130917731","https://openalex.org/W2525412388","https://openalex.org/W2742542661","https://openalex.org/W2747172199","https://openalex.org/W2842511635","https://openalex.org/W2962839749","https://openalex.org/W2963914279","https://openalex.org/W3015249983","https://openalex.org/W3036601975","https://openalex.org/W3086923691","https://openalex.org/W3196749180","https://openalex.org/W3197994565","https://openalex.org/W3198771897","https://openalex.org/W3205533980","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3211224152","https://openalex.org/W3212119822","https://openalex.org/W4221140371","https://openalex.org/W4224933371","https://openalex.org/W4297803592","https://openalex.org/W4297808394","https://openalex.org/W4361994820","https://openalex.org/W6631190155","https://openalex.org/W6679418476","https://openalex.org/W6780218876","https://openalex.org/W6780226713","https://openalex.org/W6784158405","https://openalex.org/W6803378298","https://openalex.org/W6809829207"],"related_works":["https://openalex.org/W2729514902","https://openalex.org/W2024160000","https://openalex.org/W2773500201","https://openalex.org/W2061273563","https://openalex.org/W4287995534","https://openalex.org/W2743258233","https://openalex.org/W2285052147","https://openalex.org/W2998168123","https://openalex.org/W1972656095","https://openalex.org/W2970216048"],"abstract_inverted_index":{"Studies":[0],"have":[1],"shown":[2],"high":[3],"performance":[4,183],"in":[5,27,128,193],"the":[6,61,66,71,93,96,101,107,111,116,120,125,129,136,141,163,169,174,182,185,190,194,200],"speech":[7,16,58,85,192],"emotion":[8],"recognition":[9],"(SER)":[10],"task":[11],"by":[12,139],"fine-tuning":[13,69],"a":[14,33,76,83,146],"self-supervised":[15,84],"representation":[17,86],"model.":[18,72,122],"Although":[19],"this":[20],"model":[21,87,114,186],"can":[22,179],"provide":[23],"emotionally":[24],"discriminative":[25,126],"embedding":[26,144],"clean":[28,108,143,164,191],"conditions,":[29],"adapting":[30],"it":[31,47],"to":[32,50,81],"noisy":[34,57,89,117,131,151,166,196],"target":[35,130,195],"environment":[36],"is":[37,48],"still":[38],"required":[39],"when":[40],"deployed":[41],"on":[42],"real-world":[43],"applications.":[44],"For":[45],"adaptation,":[46],"essential":[49],"balance":[51],"between":[52,106],"acquiring":[53],"new":[54],"knowledge":[55,63,94,127],"from":[56,110,119],"and":[59,68,115,149,165],"keeping":[60],"previous":[62],"acquired":[64],"during":[65],"pre-training":[67],"of":[70,95,168,184],"Therefore,":[73],"we":[74,99,133],"propose":[75],"contrastive":[77,175],"teacher-student":[78,176],"learning":[79,177],"framework":[80,178],"retrain":[82],"for":[88,198],"SER.":[90],"To":[91,123],"keep":[92],"original":[97,112],"model,":[98],"minimize":[100,135],"root":[102],"mean":[103],"square":[104],"error":[105],"embeddings":[109,118,152],"SER":[113],"retrained":[121],"acquire":[124],"condition,":[132],"also":[134],"InfoNCE":[137],"loss":[138],"selecting":[140],"corresponding":[142],"as":[145,157],"positive":[147],"sample":[148],"other":[150],"with":[153,162,189],"different":[154],"emotional":[155,201],"labels":[156],"negative":[158],"samples.":[159],"Our":[160],"experiment":[161],"version":[167],"MSP-Podcast":[170],"corpus":[171],"demonstrates":[172],"that":[173],"significantly":[180],"improve":[181],"only":[187],"trained":[188],"condition":[197],"all":[199],"attributes.":[202]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
