{"id":"https://openalex.org/W4392405784","doi":"https://doi.org/10.1109/taslp.2024.3372874","title":"Articulatory Copy Synthesis Based on the Speech Synthesizer VocalTractLab and Convolutional Recurrent Neural Networks","display_name":"Articulatory Copy Synthesis Based on the Speech Synthesizer VocalTractLab and Convolutional Recurrent Neural Networks","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392405784","doi":"https://doi.org/10.1109/taslp.2024.3372874"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3372874","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3372874","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033063238","display_name":"Yingming Gao","orcid":"https://orcid.org/0000-0001-5881-3723"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yingming Gao","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046141664","display_name":"Peter Birkholz","orcid":"https://orcid.org/0000-0003-0167-8123"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peter Birkholz","raw_affiliation_strings":["Institute of Acoustics and Speech Communication, Technische Universit&#x00E4;t Dresden, Dresden, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics and Speech Communication, Technische Universit&#x00E4;t Dresden, Dresden, Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100343662","display_name":"Ya Li","orcid":"https://orcid.org/0000-0002-6284-5039"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ya Li","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5033063238"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":1.0336,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78739807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"1845","last_page":"1858"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.7568916082382202},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7384597063064575},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7307158708572388},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6111114025115967},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4785071909427643},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4324757754802704},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.41183164715766907},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2853735089302063}],"concepts":[{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.7568916082382202},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7384597063064575},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7307158708572388},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6111114025115967},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4785071909427643},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4324757754802704},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.41183164715766907},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2853735089302063},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3372874","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3372874","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2217149964","display_name":null,"funder_award_id":"2023RC73","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G2249315591","display_name":null,"funder_award_id":"62271083","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7136399700","display_name":null,"funder_award_id":"2023RC13","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W623495177","https://openalex.org/W1522301498","https://openalex.org/W1525807100","https://openalex.org/W1531956331","https://openalex.org/W1963854000","https://openalex.org/W1965378753","https://openalex.org/W2011845089","https://openalex.org/W2023728986","https://openalex.org/W2025877346","https://openalex.org/W2026149420","https://openalex.org/W2027297142","https://openalex.org/W2055115647","https://openalex.org/W2057649128","https://openalex.org/W2068447135","https://openalex.org/W2110095567","https://openalex.org/W2115730616","https://openalex.org/W2123778414","https://openalex.org/W2231075402","https://openalex.org/W2402610474","https://openalex.org/W2406504561","https://openalex.org/W2466021321","https://openalex.org/W2563517939","https://openalex.org/W2610175270","https://openalex.org/W2622158094","https://openalex.org/W2728212668","https://openalex.org/W2972425221","https://openalex.org/W2972537746","https://openalex.org/W2972552159","https://openalex.org/W3025683731","https://openalex.org/W3027869639","https://openalex.org/W3037959216","https://openalex.org/W3186617362","https://openalex.org/W3216139160","https://openalex.org/W6607631539","https://openalex.org/W6631190155","https://openalex.org/W6632433047","https://openalex.org/W6685662889","https://openalex.org/W6777941750","https://openalex.org/W6779985876","https://openalex.org/W6799101400","https://openalex.org/W6804363299"],"related_works":["https://openalex.org/W2990005675","https://openalex.org/W2374317326","https://openalex.org/W153239700","https://openalex.org/W2067459736","https://openalex.org/W2020989338","https://openalex.org/W2105635394","https://openalex.org/W1823617068","https://openalex.org/W2147126679","https://openalex.org/W4300049944","https://openalex.org/W2115039802"],"abstract_inverted_index":{"Articulatory":[0],"copy":[1],"synthesis":[2],"(ACS)":[3],"refers":[4],"to":[5,104,186,218],"the":[6,18,29,43,109,116,123,129,134,146,156,172,205,226,239],"synthetic":[7,87,99,135],"reproduction":[8],"of":[9,15,20,31,108,133,149,169,202,232],"natural":[10,95],"utterances.":[11,136,215],"The":[12,79],"existing":[13],"methods":[14,140],"ACS":[16,39,66,174,195],"have":[17],"limitations":[19],"poor":[21],"generalizability":[22],"for":[23,82,212],"unknown":[24],"speakers,":[25],"high":[26],"computing":[27],"costs,":[28],"lack":[30],"systematic":[32],"evaluation,":[33],"etc.":[34],"Here":[35],"we":[36],"propose":[37],"an":[38,198],"method":[40,196],"based":[41,144,154],"on":[42,85,145,155],"articulatory":[44,72,100,110,150,178,210],"speech":[45,96,228,241],"synthesizer":[46],"VocalTractLab":[47],"(VTL)":[48],"and":[49,62,71,76,97,111,128,152,161,207,222,235],"convolutional":[50],"recurrent":[51],"neural":[52],"networks.":[53],"We":[54],"first":[55],"created":[56],"paired":[57],"articulatory-acoustic":[58],"samples":[59,118],"using":[60,238],"VTL,":[61],"then":[63,182],"trained":[64,173],"neural-network-based":[65],"models":[67,175],"with":[68,94],"acoustic":[69,112,157,163],"features":[70],"trajectories":[73,151,179],"as":[74,106,114],"inputs":[75],"outputs,":[77],"respectively.":[78],"basic":[80],"approach":[81],"training":[83,88,117],"relied":[84],"fully":[86],"data":[89],"(and":[90],"was":[91],"later":[92],"supplemented":[93],"corresponding":[98],"data).":[101],"In":[102],"addition,":[103],"represent":[105],"much":[107],"space":[113],"possible,":[115],"were":[119,141,181],"augmented":[120],"by":[121],"varying":[122],"phonation":[124],"type,":[125],"speaking":[126],"effort,":[127],"vocal":[130],"tract":[131],"length":[132],"Furthermore,":[137],"two":[138],"regularization":[139],"proposed:":[142],"one":[143],"smoothness":[147],"loss":[148,158],"another":[153],"between":[159,204],"original":[160],"estimated":[162,208],"features.":[164],"For":[165],"given":[166],"new":[167,188],"utterances":[168],"arbitrary":[170],"length,":[171],"could":[176],"estimate":[177],"that":[180,192],"fed":[183],"into":[184],"VTL":[185,209],"synthesize":[187],"speech.":[189],"Experiments":[190],"showed":[191],"our":[193],"proposed":[194],"achieved":[197,229],"average":[199],"correlation":[200],"coefficient":[201],"0.983":[203],"reference":[206],"parameters":[211],"speaker-dependent":[213],"German":[214],"When":[216],"applied":[217],"speaker-independent":[219],"German,":[220],"English,":[221],"Mandarin":[223],"Chinese":[224],"utterances,":[225],"copy-synthesized":[227],"recognition":[230],"rates":[231],"73.88%,":[233],"52.92%,":[234],"52.41%,":[236],"respectively,":[237],"automatic":[240],"recognizer":[242],"Google":[243],"Speech-to-Text.":[244]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
