{"id":"https://openalex.org/W3094025065","doi":"https://doi.org/10.1109/sped53181.2021.9587434","title":"Effects of F0 Estimation Algorithms on Ultrasound-Based Silent Speech Interfaces","display_name":"Effects of F0 Estimation Algorithms on Ultrasound-Based Silent Speech Interfaces","publication_year":2021,"publication_date":"2021-10-13","ids":{"openalex":"https://openalex.org/W3094025065","doi":"https://doi.org/10.1109/sped53181.2021.9587434","mag":"3094025065"},"language":"en","primary_location":{"id":"doi:10.1109/sped53181.2021.9587434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped53181.2021.9587434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074142586","display_name":"Pengyu Dai","orcid":"https://orcid.org/0009-0009-6014-0041"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":true,"raw_author_name":"Pengyu Dai","raw_affiliation_strings":["Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086670617","display_name":"Mohammed Salah Al-Radhi","orcid":"https://orcid.org/0000-0003-3094-6916"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Mohammed Salah Al-Radhi","raw_affiliation_strings":["Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016031960","display_name":"Tam\u00e1s G\u00e1bor Csap\u00f3","orcid":"https://orcid.org/0000-0003-4375-7524"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Tamas Gabor Csapo","raw_affiliation_strings":["Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5074142586"],"corresponding_institution_ids":["https://openalex.org/I29770179"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00581474,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"2002","issue":null,"first_page":"47","last_page":"51"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7184606790542603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7064351439476013},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.6945641040802002},{"id":"https://openalex.org/keywords/fundamental-frequency","display_name":"Fundamental frequency","score":0.5473227500915527},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5333983898162842},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5238991975784302},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49708274006843567},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4329940974712372},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.41164523363113403},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38306689262390137},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34765389561653137},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.28377270698547363},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.250240296125412}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7184606790542603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7064351439476013},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.6945641040802002},{"id":"https://openalex.org/C10513763","wikidata":"https://www.wikidata.org/wiki/Q1331774","display_name":"Fundamental frequency","level":2,"score":0.5473227500915527},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5333983898162842},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5238991975784302},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49708274006843567},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4329940974712372},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.41164523363113403},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38306689262390137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34765389561653137},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28377270698547363},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.250240296125412},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sped53181.2021.9587434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped53181.2021.9587434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1546892833","https://openalex.org/W1893870629","https://openalex.org/W1980988001","https://openalex.org/W2008120082","https://openalex.org/W2013139519","https://openalex.org/W2091425152","https://openalex.org/W2096429026","https://openalex.org/W2143929858","https://openalex.org/W2145442746","https://openalex.org/W2145892079","https://openalex.org/W2161900370","https://openalex.org/W2291998724","https://openalex.org/W2294901616","https://openalex.org/W2402091997","https://openalex.org/W2405674977","https://openalex.org/W2508425411","https://openalex.org/W2515755543","https://openalex.org/W2516001803","https://openalex.org/W2554625447","https://openalex.org/W2626848556","https://openalex.org/W2745751587","https://openalex.org/W2746109435","https://openalex.org/W2889853672","https://openalex.org/W2891836073","https://openalex.org/W2949893096","https://openalex.org/W2972959620","https://openalex.org/W2991648531","https://openalex.org/W6713998602","https://openalex.org/W6754356477"],"related_works":["https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097","https://openalex.org/W4385672897","https://openalex.org/W2373675101","https://openalex.org/W106160982","https://openalex.org/W2359140082","https://openalex.org/W2074132948","https://openalex.org/W1975651710","https://openalex.org/W1494715773"],"abstract_inverted_index":{"This":[0],"paper":[1],"shows":[2],"recent":[3],"Silent":[4],"Speech":[5],"Interface":[6],"(SSI)":[7],"progress":[8],"that":[9,87,121],"translates":[10],"tongue":[11],"motions":[12],"into":[13],"audible":[14],"speech.":[15],"In":[16],"our":[17],"previous":[18],"work":[19],"and":[20,72,134],"also":[21],"in":[22,105,131],"the":[23,26,58,65,99,112],"current":[24],"study,":[25],"prediction":[27],"of":[28,60],"fundamental":[29],"frequency":[30],"(F0)":[31],"from":[32],"Ultra-Sound":[33],"Tongue":[34],"Images":[35],"(UTI)":[36],"was":[37],"achieved":[38],"using":[39,77],"articulatory-to-acoustic":[40,100],"mapping":[41,101],"methods":[42],"based":[43],"on":[44],"deep":[45,78],"learning.":[46],"Here":[47],"we":[48],"investigated":[49],"several":[50],"traditional":[51],"discontinuous":[52,89,122],"speech-based":[53],"F0":[54,90,115],"estimation":[55],"algorithms":[56,91,123],"for":[57],"target":[59],"UTI-based":[61],"SSI":[62],"system.":[63],"Besides,":[64],"vocoder":[66],"parameters":[67],"(F0,":[68],"Maximum":[69],"Voiced":[70],"Frequency":[71],"Mel-Generalized":[73],"Cepstrum)":[74],"are":[75,92,126],"predicted":[76,93],"neural":[79],"networks,":[80],"with":[81,94],"UTI":[82],"as":[83],"input.":[84],"We":[85],"found":[86],"those":[88],"a":[95],"lower":[96],"error":[97],"during":[98],"experiments.":[102],"They":[103],"result":[104],"slightly":[106],"more":[107],"natural":[108],"synthesized":[109],"speech":[110,130],"than":[111],"baseline":[113],"continuous":[114],"algorithm.":[116],"Moreover,":[117],"experimental":[118],"results":[119],"confirmed":[120],"(e.g.":[124],"Yin)":[125],"closest":[127],"to":[128],"original":[129],"objective":[132],"metrics":[133],"subjective":[135],"listening":[136],"test.":[137]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
