{"id":"https://openalex.org/W4411599582","doi":"https://doi.org/10.1109/iotm.001.2500013","title":"Mandarin Electrolaryngeal Speech Voice Conversion with Speech Encoder Loss Learning and Seq2seq Modeling","display_name":"Mandarin Electrolaryngeal Speech Voice Conversion with Speech Encoder Loss Learning and Seq2seq Modeling","publication_year":2025,"publication_date":"2025-06-24","ids":{"openalex":"https://openalex.org/W4411599582","doi":"https://doi.org/10.1109/iotm.001.2500013"},"language":"en","primary_location":{"id":"doi:10.1109/iotm.001.2500013","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iotm.001.2500013","pdf_url":null,"source":{"id":"https://openalex.org/S4210201254","display_name":"IEEE Internet of Things Magazine","issn_l":"2576-3180","issn":["2576-3180","2576-3199"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113786479","display_name":"Ming-Chi Yen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Ming-Chi Yen","raw_affiliation_strings":["Institute of Information Science, Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024882790","display_name":"Chia-Hua Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chia-Hua Wu","raw_affiliation_strings":["Institute of Information Science, Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082770847","display_name":"Shu-Wei Tsai","orcid":"https://orcid.org/0000-0001-8335-9825"},"institutions":[{"id":"https://openalex.org/I4210158999","display_name":"National Cheng Kung University Hospital","ror":"https://ror.org/04zx3rq17","country_code":"TW","type":"healthcare","lineage":["https://openalex.org/I4210158999"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Shu-Wei Tsai","raw_affiliation_strings":["National Cheng Kung University Hospital,Department of Otolaryngology Head and Neck Surgery,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Cheng Kung University Hospital,Department of Otolaryngology Head and Neck Surgery,Taiwan","institution_ids":["https://openalex.org/I4210158999"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073451247","display_name":"Jyh\u2010Shing Roger Jang","orcid":"https://orcid.org/0000-0002-7319-9095"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jyh-Shing Roger Jang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Institute of Information Science, Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062211930","display_name":"Amir Hussain","orcid":"https://orcid.org/0000-0002-8080-082X"},"institutions":[{"id":"https://openalex.org/I251738","display_name":"Edinburgh Napier University","ror":"https://ror.org/03zjvnn91","country_code":"GB","type":"education","lineage":["https://openalex.org/I251738"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Amir Hussain","raw_affiliation_strings":["School of Computing, Edinburgh Napier University,UK"],"affiliations":[{"raw_affiliation_string":"School of Computing, Edinburgh Napier University,UK","institution_ids":["https://openalex.org/I251738"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071214181","display_name":"Hsin\u2010Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5113786479"],"corresponding_institution_ids":["https://openalex.org/I4210098366"],"apc_list":null,"apc_paid":null,"fwci":2.8599,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91543998,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"8","issue":"4","first_page":"22","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.8791084289550781},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7348982095718384},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6160334348678589},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6029012203216553},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4475265443325043},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3187037706375122}],"concepts":[{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.8791084289550781},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7348982095718384},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6160334348678589},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6029012203216553},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4475265443325043},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3187037706375122},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iotm.001.2500013","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iotm.001.2500013","pdf_url":null,"source":{"id":"https://openalex.org/S4210201254","display_name":"IEEE Internet of Things Magazine","issn_l":"2576-3180","issn":["2576-3180","2576-3199"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Magazine","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2022125261","https://openalex.org/W2048646122","https://openalex.org/W2128160875","https://openalex.org/W2168510624","https://openalex.org/W2902967674","https://openalex.org/W3016160783","https://openalex.org/W3096567388","https://openalex.org/W4206319965","https://openalex.org/W4210590938","https://openalex.org/W4214567298","https://openalex.org/W4319862397","https://openalex.org/W4385822466","https://openalex.org/W6681377254"],"related_works":["https://openalex.org/W2990005675","https://openalex.org/W2374317326","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W2980645678"],"abstract_inverted_index":{"Electrolaryngeal":[0],"(EL)":[1],"speech":[2,19,31,77,112,128,151,162,170,199,214],"utilizes":[3],"excitation":[4],"signals":[5],"generated":[6],"by":[7,114],"an":[8],"electrolarynx":[9],"instead":[10],"of":[11,51,98,121,144,177],"human":[12],"vocal":[13],"vibrations.":[14],"In":[15,133],"daily":[16],"communication,":[17],"EL":[18,52,76,110,126],"is":[20,67,74],"less":[21],"natural":[22,29],"and":[23,37,49,58,71,100,163,179,218],"more":[24],"difficult":[25],"to":[26,33,45,140,156,173,188,200,222],"understand":[27],"than":[28],"(NL)":[30],"due":[32],"mechanical":[34],"vibration":[35],"noise":[36],"fixed":[38],"pitch.":[39],"Different":[40],"methods":[41],"have":[42],"been":[43,87,131],"proposed":[44,88],"improve":[46],"the":[47,72,96,119,142,153,158,175,212,223],"quality":[48],"intelligibility":[50],"speech,":[53],"but":[54],"limited":[55],"training":[56,191],"data":[57],"atypical":[59],"acoustic":[60],"characteristics":[61],"pose":[62],"challenges.":[63],"Voice":[64],"conversion":[65],"(VC)":[66],"one":[68],"popular":[69],"method,":[70],"task":[73],"called":[75],"VC":[78],"(ELVC).":[79],"Sequence-to-sequence":[80],"(seq2seq)":[81],"modeling":[82],"with":[83],"pretraining":[84,154],"strategies":[85],"has":[86,107,129],"for":[89],"ELVC.":[90],"However,":[91],"seq2seq":[92,122],"ELVC":[93,123,145],"still":[94],"faces":[95],"problem":[97,176],"incomplete":[99,178],"missing":[101,180],"phonemes.":[102,181],"Furthermore,":[103],"although":[104],"previous":[105],"work":[106],"evaluated":[108],"simulated":[109],"(sEL)":[111],"produced":[113],"healthy":[115],"speakers":[116],"using":[117],"electrolarynxes,":[118],"effectiveness":[120],"on":[124,197],"patient":[125],"(pEL)":[127],"not":[130],"studied.":[132],"this":[134],"article,":[135],"we":[136,148,167,183],"propose":[137],"three":[138],"approaches":[139,210],"address":[141],"issues":[143],"implementation.":[146],"First,":[147],"utilize":[149],"sEL":[150],"in":[152,215],"stage":[155],"close":[157],"gap":[159],"between":[160],"pEL":[161,190,198],"NL":[164],"speech.":[165,192],"Second,":[166],"adopt":[168],"a":[169],"encoder":[171],"loss":[172],"solve":[174],"Third,":[182],"introduce":[184],"waveform":[185],"similarity":[186],"overlap-and-add":[187],"augment":[189],"We":[193],"conduct":[194],"systematic":[195],"experiments":[196],"evaluate":[201],"our":[202,209],"approaches.":[203],"Ablation":[204],"studies":[205],"show":[206],"that":[207],"incorporating":[208],"improves":[211],"converted":[213],"both":[216],"objective":[217],"subjective":[219],"evaluations":[220],"compared":[221],"baseline":[224],"model.":[225]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
