{"id":"https://openalex.org/W4408832887","doi":"https://doi.org/10.3390/fi17040143","title":"EmoSDS: Unified Emotionally Adaptive Spoken Dialogue System Using Self-Supervised Speech Representations","display_name":"EmoSDS: Unified Emotionally Adaptive Spoken Dialogue System Using Self-Supervised Speech Representations","publication_year":2025,"publication_date":"2025-03-25","ids":{"openalex":"https://openalex.org/W4408832887","doi":"https://doi.org/10.3390/fi17040143"},"language":"en","primary_location":{"id":"doi:10.3390/fi17040143","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040143","pdf_url":"https://www.mdpi.com/1999-5903/17/4/143/pdf?version=1742892814","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-5903/17/4/143/pdf?version=1742892814","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jaehwan Lee","orcid":"https://orcid.org/0009-0000-0873-375X"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaehwan Lee","raw_affiliation_strings":["Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0000-0873-375X","affiliations":[{"raw_affiliation_string":"Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065341552","display_name":"Youngjun Sim","orcid":null},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngjun Sim","raw_affiliation_strings":["Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-7986-1708","affiliations":[{"raw_affiliation_string":"Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053619502","display_name":"Jinyou Kim","orcid":"https://orcid.org/0009-0001-0554-2701"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jinyou Kim","raw_affiliation_strings":["Department of Computer Science and Engineering, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0001-0554-2701","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033502618","display_name":"Young-Joo Suh","orcid":"https://orcid.org/0000-0001-7208-1709"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Young-Joo Suh","raw_affiliation_strings":["Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-7208-1709","affiliations":[{"raw_affiliation_string":"Graduate School of Artificial Intelligence, Pohang University of Science and Technology (POSTECH), Pohang 37673, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5033502618"],"corresponding_institution_ids":["https://openalex.org/I123900574"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":5.7171,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95212213,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"17","issue":"4","first_page":"143","last_page":"143"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9125545024871826},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5580499172210693},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5003585815429688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.460746169090271}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9125545024871826},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5580499172210693},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5003585815429688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.460746169090271}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/fi17040143","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040143","pdf_url":"https://www.mdpi.com/1999-5903/17/4/143/pdf?version=1742892814","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jftint:v:17:y:2025:i:4:p:143-:d:1619746","is_oa":false,"landing_page_url":"https://www.mdpi.com/1999-5903/17/4/143/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:d4d80798ca9a4e6fbfadf591f88e5585","is_oa":true,"landing_page_url":"https://doaj.org/article/d4d80798ca9a4e6fbfadf591f88e5585","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Future Internet, Vol 17, Iss 4, p 143 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/fi17040143","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040143","pdf_url":"https://www.mdpi.com/1999-5903/17/4/143/pdf?version=1742892814","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2188307703","display_name":null,"funder_award_id":"2022R1A6A1A03052954","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G4306656980","display_name":null,"funder_award_id":"RS-2019-II191906","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G4358023396","display_name":null,"funder_award_id":"RS-2019-II191906","funder_id":"https://openalex.org/F4320335199","funder_display_name":"Korea Institute of Energy Technology Evaluation and Planning"},{"id":"https://openalex.org/G4467383673","display_name":null,"funder_award_id":"20214810100010","funder_id":"https://openalex.org/F4320335199","funder_display_name":"Korea Institute of Energy Technology Evaluation and Planning"},{"id":"https://openalex.org/G7208675597","display_name":null,"funder_award_id":"20214810100010","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G7776637999","display_name":null,"funder_award_id":"2022R1A6A1A03052954","funder_id":"https://openalex.org/F4320335199","funder_display_name":"Korea Institute of Energy Technology Evaluation and Planning"}],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320335199","display_name":"Korea Institute of Energy Technology Evaluation and Planning","ror":"https://ror.org/02zq38y32"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408832887.pdf"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2101105183","https://openalex.org/W3140429000","https://openalex.org/W3153025627","https://openalex.org/W3174716116","https://openalex.org/W3193377986","https://openalex.org/W3198429080","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4205742757","https://openalex.org/W4226380987","https://openalex.org/W4295308567","https://openalex.org/W4372260053","https://openalex.org/W4372260474","https://openalex.org/W4385823432","https://openalex.org/W4389524500","https://openalex.org/W4391021623","https://openalex.org/W4392902857","https://openalex.org/W4392931281","https://openalex.org/W4402683976","https://openalex.org/W4405709862","https://openalex.org/W6682631176","https://openalex.org/W6761205521"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"advancements":[3],"in":[4,28,165,176],"artificial":[5],"intelligence,":[6],"speech,":[7],"and":[8,58,98,120,127,148,168,187,193],"natural":[9],"language":[10],"processing":[11],"technology":[12],"have":[13],"enhanced":[14],"spoken":[15],"dialogue":[16],"systems":[17],"(SDSs),":[18],"enabling":[19],"natural,":[20],"voice-based":[21],"human\u2013computer":[22],"interaction.":[23],"However,":[24],"discrete,":[25],"token-based":[26],"LLMs":[27],"emotionally":[29],"adaptive":[30],"SDSs":[31],"focus":[32],"on":[33],"lexical":[34],"content":[35,119],"while":[36],"overlooking":[37],"essential":[38],"paralinguistic":[39,63,122],"cues":[40,189],"for":[41,52,190],"emotion":[42,48,99,154],"expression.":[43],"Existing":[44],"methods":[45],"use":[46],"external":[47],"predictors":[49],"to":[50,60,79,114,184],"compensate":[51],"this":[53],"but":[54],"introduce":[55],"computational":[56],"overhead":[57],"fail":[59],"fully":[61],"integrate":[62],"features":[64],"with":[65,139],"linguistic":[66,118],"context.":[67],"Moreover,":[68],"the":[69,112,181],"lack":[70],"of":[71],"high-quality":[72],"emotional":[73,82,125,140,146,166,186],"speech":[74,97],"datasets":[75],"limits":[76],"models\u2019":[77],"ability":[78,183],"learn":[80,115],"expressive":[81,192],"cues.":[83],"To":[84],"address":[85],"these":[86],"challenges,":[87],"we":[88,131],"propose":[89],"EmoSDS,":[90],"a":[91,134,149,172],"unified":[92],"SDS":[93],"framework":[94],"that":[95,160],"integrates":[96],"recognition":[100],"by":[101],"leveraging":[102],"self-supervised":[103],"learning":[104],"(SSL)":[105],"features.":[106],"Our":[107],"three-stage":[108],"training":[109],"pipeline":[110],"enables":[111],"LLM":[113],"both":[116],"discrete":[117],"continuous":[121],"features,":[123],"improving":[124],"expressiveness":[126],"response":[128,169],"naturalness.":[129],"Additionally,":[130],"construct":[132],"EmoSC,":[133],"dataset":[135],"combining":[136],"GPT-generated":[137],"dialogues":[138],"voice":[141],"conversion":[142],"data,":[143],"ensuring":[144],"greater":[145],"diversity":[147],"balanced":[150],"sample":[151],"distribution":[152],"across":[153],"categories.":[155],"The":[156],"experimental":[157],"results":[158],"show":[159],"EmoSDS":[161],"outperforms":[162],"existing":[163],"models":[164],"alignment":[167],"generation,":[170],"achieving":[171],"minimum":[173],"2.9%":[174],"increase":[175],"text":[177],"generation":[178],"metrics,":[179],"enhancing":[180],"LLM\u2019s":[182],"interpret":[185],"textual":[188],"more":[191],"contextually":[194],"appropriate":[195],"responses.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
