{"id":"https://openalex.org/W4392904084","doi":"https://doi.org/10.1109/icassp48485.2024.10446886","title":"Creating Personalized Synthetic Voices from Articulation Impaired Speech Using Augmented Reconstruction Loss","display_name":"Creating Personalized Synthetic Voices from Articulation Impaired Speech Using Augmented Reconstruction Loss","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904084","doi":"https://doi.org/10.1109/icassp48485.2024.10446886"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446886","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002748018","display_name":"Yusheng Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yusheng Tian","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101527946","display_name":"Jingyu Li","orcid":"https://orcid.org/0000-0002-1163-4557"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyu Li","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001795601","display_name":"Tan Lee","orcid":"https://orcid.org/0000-0002-7089-3436"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tan Lee","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong SAR","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5002748018"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02614761,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":null,"first_page":"11501","last_page":"11505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/articulation","display_name":"Articulation (sociology)","score":0.822820246219635},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7028498649597168},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6922265887260437},{"id":"https://openalex.org/keywords/manner-of-articulation","display_name":"Manner of articulation","score":0.55641108751297},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.52593594789505},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.4425680339336395},{"id":"https://openalex.org/keywords/tongue","display_name":"Tongue","score":0.43187129497528076},{"id":"https://openalex.org/keywords/place-of-articulation","display_name":"Place of articulation","score":0.41152259707450867},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1059349775314331}],"concepts":[{"id":"https://openalex.org/C2779337067","wikidata":"https://www.wikidata.org/wiki/Q4800961","display_name":"Articulation (sociology)","level":3,"score":0.822820246219635},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7028498649597168},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6922265887260437},{"id":"https://openalex.org/C66029223","wikidata":"https://www.wikidata.org/wiki/Q210847","display_name":"Manner of articulation","level":2,"score":0.55641108751297},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.52593594789505},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.4425680339336395},{"id":"https://openalex.org/C2779744641","wikidata":"https://www.wikidata.org/wiki/Q9614","display_name":"Tongue","level":2,"score":0.43187129497528076},{"id":"https://openalex.org/C36933642","wikidata":"https://www.wikidata.org/wiki/Q214090","display_name":"Place of articulation","level":4,"score":0.41152259707450867},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1059349775314331},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C2778203577","wikidata":"https://www.wikidata.org/wiki/Q38035","display_name":"Consonant","level":3,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446886","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1514928307","https://openalex.org/W2005768155","https://openalex.org/W2034841618","https://openalex.org/W2069221573","https://openalex.org/W2070355889","https://openalex.org/W2072563337","https://openalex.org/W2271865536","https://openalex.org/W2396988173","https://openalex.org/W2593073653","https://openalex.org/W2608036320","https://openalex.org/W2920934251","https://openalex.org/W2964243274","https://openalex.org/W2973215447","https://openalex.org/W3042609801","https://openalex.org/W3094856303","https://openalex.org/W3097538987","https://openalex.org/W3160618567","https://openalex.org/W4313269328","https://openalex.org/W4367281575","https://openalex.org/W4377195654","https://openalex.org/W4385822458","https://openalex.org/W6677082149","https://openalex.org/W6678280073","https://openalex.org/W6750523955","https://openalex.org/W6768332837","https://openalex.org/W6783867762","https://openalex.org/W6803547063"],"related_works":["https://openalex.org/W2022221551","https://openalex.org/W1037709911","https://openalex.org/W4231989107","https://openalex.org/W2063579271","https://openalex.org/W2748448327","https://openalex.org/W4388123402","https://openalex.org/W2025106137","https://openalex.org/W4388123242","https://openalex.org/W2038007203","https://openalex.org/W2054296603"],"abstract_inverted_index":{"This":[0,59],"research":[1],"is":[2,17,33,60,105],"about":[3],"the":[4,39,45,53,74,93,99,114,153,167],"creation":[5],"of":[6,51,65,95,127,147],"personalized":[7],"synthetic":[8,154],"voices":[9],"for":[10],"head":[11],"and":[12,56,129],"neck":[13],"cancer":[14,22,149],"survivors.":[15],"It":[16],"focused":[18],"particularly":[19],"on":[20,143],"tongue":[21,148],"patients":[23],"whose":[24],"speech":[25,77],"might":[26],"exhibit":[27],"severe":[28],"articulation":[29,37,97,112,125,158],"impairment.":[30],"Our":[31],"goal":[32],"to":[34,72,160],"restore":[35],"normal":[36],"in":[38,49,98,113],"synthesized":[40],"speech,":[41,131,163],"while":[42,164],"maximally":[43],"preserving":[44],"target":[46,168],"speaker\u2019s":[47,169],"individuality":[48],"terms":[50,120],"both":[52],"voice":[54,155],"timbre":[55],"speaking":[57],"style.":[58],"formulated":[61],"as":[62],"a":[63,88,106,136,144],"task":[64],"learning":[66],"from":[67,123],"noisy":[68],"labels.":[69],"We":[70],"propose":[71],"augment":[73],"commonly":[75],"used":[76],"reconstruction":[78],"loss":[79,90,108,119],"with":[80],"two":[81],"additional":[82,118],"terms.":[83],"The":[84,102],"first":[85],"term":[86,104],"constitutes":[87],"regularization":[89],"that":[91,109,152],"mitigates":[92],"impact":[94],"distorted":[96],"training":[100],"speech.":[101,116],"second":[103],"consistency":[107],"encourages":[110],"correct":[111],"generated":[115,130],"These":[117],"are":[121,133,173],"obtained":[122],"frame-level":[124],"scores":[126],"original":[128],"which":[132],"derived":[134],"using":[135],"separately":[137],"trained":[138],"phone":[139],"classifier.":[140],"Experimental":[141],"results":[142],"real":[145],"case":[146],"patient":[150],"confirm":[151],"achieves":[156],"comparable":[157],"quality":[159],"unimpaired":[161],"natural":[162],"effectively":[165],"maintaining":[166],"individuality.":[170],"Audio":[171],"samples":[172],"available":[174],"at":[175],"https://myspeechproject.github.io/ArticulationRepair/.":[176]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
