{"id":"https://openalex.org/W4402304233","doi":"https://doi.org/10.1109/ifuzzy63051.2024.10661368","title":"Fuzzy Expectation Maximization Phoneme Prediction in Diffusion Model-based Dysarthria Voice Conversion","display_name":"Fuzzy Expectation Maximization Phoneme Prediction in Diffusion Model-based Dysarthria Voice Conversion","publication_year":2024,"publication_date":"2024-08-10","ids":{"openalex":"https://openalex.org/W4402304233","doi":"https://doi.org/10.1109/ifuzzy63051.2024.10661368"},"language":"en","primary_location":{"id":"doi:10.1109/ifuzzy63051.2024.10661368","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ifuzzy63051.2024.10661368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Fuzzy Theory and Its Applications (iFUZZY)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114222291","display_name":"Guang\u2013Tao Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I91279580","display_name":"Chung Shan Medical University","ror":"https://ror.org/059ryjv25","country_code":"TW","type":"education","lineage":["https://openalex.org/I91279580"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Guang\u2013Tao Lin","raw_affiliation_strings":["Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan","institution_ids":["https://openalex.org/I91279580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113370756","display_name":"Wen\u2013Shin Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210104362","display_name":"Chung Shan Medical University Hospital","ror":"https://ror.org/01abtsn51","country_code":"TW","type":"funder","lineage":["https://openalex.org/I4210104362"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wen\u2013Shin Hsu","raw_affiliation_strings":["Chung Shan Medical University &#x0026; Chung Shan Medical University Hospital,Department of Medical Information &#x0026; Informatics Office Technology,Taichung,Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Shan Medical University &#x0026; Chung Shan Medical University Hospital,Department of Medical Information &#x0026; Informatics Office Technology,Taichung,Taiwan","institution_ids":["https://openalex.org/I4210104362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113370757","display_name":"Guan\u2013Tsen Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I91279580","display_name":"Chung Shan Medical University","ror":"https://ror.org/059ryjv25","country_code":"TW","type":"education","lineage":["https://openalex.org/I91279580"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Guan\u2013Tsen Liu","raw_affiliation_strings":["Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan","institution_ids":["https://openalex.org/I91279580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001883914","display_name":"Su\u2013Juan Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I91279580","display_name":"Chung Shan Medical University","ror":"https://ror.org/059ryjv25","country_code":"TW","type":"education","lineage":["https://openalex.org/I91279580"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Su\u2013Juan Chen","raw_affiliation_strings":["Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Shan Medical University,Department of Medical Information,Taichung,Taiwan","institution_ids":["https://openalex.org/I91279580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5114222291"],"corresponding_institution_ids":["https://openalex.org/I91279580"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12743319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"21","issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9695000052452087,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dysarthria","display_name":"Dysarthria","score":0.7717705965042114},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6685926914215088},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6674891710281372},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.501126766204834},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.4566596448421478},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4370481073856354},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40626075863838196},{"id":"https://openalex.org/keywords/audiology","display_name":"Audiology","score":0.14204350113868713},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13650771975517273},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.07143300771713257}],"concepts":[{"id":"https://openalex.org/C2777639682","wikidata":"https://www.wikidata.org/wiki/Q225957","display_name":"Dysarthria","level":2,"score":0.7717705965042114},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6685926914215088},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6674891710281372},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.501126766204834},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.4566596448421478},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4370481073856354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40626075863838196},{"id":"https://openalex.org/C548259974","wikidata":"https://www.wikidata.org/wiki/Q569965","display_name":"Audiology","level":1,"score":0.14204350113868713},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13650771975517273},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.07143300771713257},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ifuzzy63051.2024.10661368","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ifuzzy63051.2024.10661368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Fuzzy Theory and Its Applications (iFUZZY)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W98096188","https://openalex.org/W1489608363","https://openalex.org/W2073849744","https://openalex.org/W2127218421","https://openalex.org/W2170493694","https://openalex.org/W3082218567","https://openalex.org/W4200001025","https://openalex.org/W4382053231","https://openalex.org/W4385822966","https://openalex.org/W6629092883"],"related_works":["https://openalex.org/W2331173358","https://openalex.org/W2969484279","https://openalex.org/W3160456149","https://openalex.org/W2168872498","https://openalex.org/W4400873482","https://openalex.org/W4239608382","https://openalex.org/W2329762060","https://openalex.org/W2517018229","https://openalex.org/W2351669973","https://openalex.org/W2312964388"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"proposed":[3,54,182,196],"an":[4],"effective":[5],"fuzzy":[6],"expectation-maximization":[7],"phoneme":[8],"prediction":[9],"method":[10,55,73,183,197],"in":[11,74,78,111,175],"diffusion":[12,100,114,118,128],"model-based":[13,48,101],"dysarthria":[14,49,109,121,132,173,179,189],"voice":[15,50,133,190],"conversion":[16,51,96,191],"(FEMPPDM-DVC)":[17],"which":[18,103],"is":[19,56,64,98,142,152,164,184],"accessible":[20],"to":[21,108,130,134,144,148,166],"(i)":[22],"training":[23],"without":[24],"parallel":[25],"data":[26,35],"(ii)":[27],"converting":[28],"a":[29,71,99,126,139],"longer":[30],"duration":[31],"of":[32,172,178],"the":[33,53,82,91,112,120,155,168,195,199],"audio":[34],"(iii)":[36],"preserves":[37],"speaker":[38],"identity.":[39],"By":[40],"integrating":[41],"Fuzzy":[42],"Expectation-Maximization":[43],"(FEM)":[44],"clustering":[45,93],"and":[46,84,170],"Diffusion":[47],"approach,":[52],"able":[57,165],"gradually":[58],"generate":[59],"normal":[60,106,135],"utterances.":[61],"Feature":[62],"extraction":[63,77],"performed":[65],"using":[66,90],"Mel-frequency":[67],"cepstral":[68],"coefficients":[69],"(MFCCs),":[70],"robust":[72],"acoustic":[75],"feature":[76],"Text-to-Speech":[79],"systems.":[80],"Ensures":[81],"effectiveness":[83],"accuracy,":[85],"through":[86,125],"repeated":[87],"parameter":[88],"adjustment":[89],"FEM":[92],"algorithm.":[94],"The":[95,181],"network":[97],"structure,":[102],"can":[104],"convert":[105,131,145],"utterances":[107,110,174],"forward":[113,117],"process.":[115],"After":[116],"process,":[119],"utterance":[122],"will":[123],"go":[124],"reverse":[127],"process":[129],"utterance.":[136],"Once":[137],"converted,":[138],"GAN-based":[140],"vocoder":[141],"applied":[143],"mel-frequency":[146],"spectrogram":[147],"waveform.":[149],"Objective":[150],"evaluation":[151],"conducted":[153],"on":[154],"Saarbr\u00fccken":[156],"Voice":[157],"Database":[158],"(SVD)":[159],"dataset":[160],"show":[161,193],"that":[162,194],"FEMDDPM-DVC":[163],"improve":[167],"intelligibility":[169],"naturalness":[171],"15":[176],"kinds":[177],"voice.":[180],"compared":[185],"with":[186],"five":[187],"other":[188,203],"methods,":[192],"has":[198],"most":[200],"performance":[201],"among":[202],"method.":[204]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
