{"id":"https://openalex.org/W4386702815","doi":"https://doi.org/10.1109/taslp.2023.3313427","title":"$F0$ Estimation and Voicing Detection With Cascade Architecture in Noisy Speech","display_name":"$F0$ Estimation and Voicing Detection With Cascade Architecture in Noisy Speech","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386702815","doi":"https://doi.org/10.1109/taslp.2023.3313427","pmid":"https://pubmed.ncbi.nlm.nih.gov/40322544"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3313427","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3313427","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12048035","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034640924","display_name":"Yixuan Zhang","orcid":"https://orcid.org/0000-0002-1343-2713"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yixuan Zhang","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, OH, USA"],"raw_orcid":"https://orcid.org/0000-0002-1343-2713","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101497068","display_name":"Heming Wang","orcid":"https://orcid.org/0000-0003-3057-9060"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heming Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, OH, USA"],"raw_orcid":"https://orcid.org/0000-0003-3057-9060","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051837453","display_name":"DeLiang Wang","orcid":"https://orcid.org/0000-0001-8195-6319"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"DeLiang Wang","raw_affiliation_strings":["Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, Ohio State University, Columbus, OH, USA"],"raw_orcid":"https://orcid.org/0000-0001-8195-6319","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5034640924"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.7676,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.7096874,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"31","issue":null,"first_page":"3760","last_page":"3770"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/voice","display_name":"Voice","score":0.8581904172897339},{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.7787768840789795},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7360881567001343},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6882386207580566},{"id":"https://openalex.org/keywords/pitch-detection-algorithm","display_name":"Pitch detection algorithm","score":0.6511659622192383},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5648949146270752},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5331071615219116},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4661620259284973},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32159650325775146},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.22339984774589539},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09781104326248169}],"concepts":[{"id":"https://openalex.org/C552089266","wikidata":"https://www.wikidata.org/wiki/Q494510","display_name":"Voice","level":2,"score":0.8581904172897339},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.7787768840789795},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7360881567001343},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6882386207580566},{"id":"https://openalex.org/C135622632","wikidata":"https://www.wikidata.org/wiki/Q7198851","display_name":"Pitch detection algorithm","level":3,"score":0.6511659622192383},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5648949146270752},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5331071615219116},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4661620259284973},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32159650325775146},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.22339984774589539},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09781104326248169},{"id":"https://openalex.org/C42360764","wikidata":"https://www.wikidata.org/wiki/Q83588","display_name":"Chemical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2023.3313427","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3313427","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmid:40322544","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40322544","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM transactions on audio, speech, and language processing","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:12048035","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12048035","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:12048035","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12048035","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"},"sustainable_development_goals":[{"score":0.8399999737739563,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G4332130492","display_name":null,"funder_award_id":"R01DC012048","funder_id":"https://openalex.org/F4320337352","funder_display_name":"National Institute on Deafness and Other Communication Disorders"}],"funders":[{"id":"https://openalex.org/F4320317189","display_name":"Ohio Supercomputer Center","ror":"https://ror.org/01apna436"},{"id":"https://openalex.org/F4320337352","display_name":"National Institute on Deafness and Other Communication Disorders","ror":"https://ror.org/04mhx6838"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W169745891","https://openalex.org/W1494198834","https://openalex.org/W1886421314","https://openalex.org/W1974387177","https://openalex.org/W1990449877","https://openalex.org/W2012086895","https://openalex.org/W2013312573","https://openalex.org/W2034300186","https://openalex.org/W2052252043","https://openalex.org/W2089580701","https://openalex.org/W2091425152","https://openalex.org/W2094130157","https://openalex.org/W2115129089","https://openalex.org/W2119599673","https://openalex.org/W2130086727","https://openalex.org/W2291877678","https://openalex.org/W2471520273","https://openalex.org/W2788372077","https://openalex.org/W2803417661","https://openalex.org/W2884436604","https://openalex.org/W2889992917","https://openalex.org/W2962866891","https://openalex.org/W2972460025","https://openalex.org/W2972654834","https://openalex.org/W2986673441","https://openalex.org/W2991361823","https://openalex.org/W3016067707","https://openalex.org/W3096235870","https://openalex.org/W3096408984","https://openalex.org/W3161042404","https://openalex.org/W3161758688","https://openalex.org/W3162109944","https://openalex.org/W3165227983","https://openalex.org/W3196611890","https://openalex.org/W3201698955","https://openalex.org/W4205664022","https://openalex.org/W4297841560","https://openalex.org/W4318751639","https://openalex.org/W6631190155","https://openalex.org/W6633684334","https://openalex.org/W6676245417","https://openalex.org/W6731370813","https://openalex.org/W6746960179","https://openalex.org/W6748228140","https://openalex.org/W6784335252","https://openalex.org/W6849446024"],"related_works":["https://openalex.org/W2117854015","https://openalex.org/W2044564457","https://openalex.org/W1548784694","https://openalex.org/W2120771489","https://openalex.org/W2111139066","https://openalex.org/W2051376034","https://openalex.org/W2149528081","https://openalex.org/W2955597484","https://openalex.org/W3110551121","https://openalex.org/W1990055248"],"abstract_inverted_index":{"As":[0],"a":[1,67,83,88,96],"fundamental":[2],"problem":[3],"in":[4,25,40,95,118],"speech":[5,27,84],"processing,":[6],"pitch":[7,23,62,74,89,111,163],"tracking":[8,24,90,164],"has":[9,17],"been":[10,18],"studied":[11],"for":[12,61],"decades.":[13],"While":[14],"strong":[15],"performance":[16,140],"achieved":[19],"on":[20],"clean":[21],"speech,":[22],"noisy":[26],"is":[28,93,102,142,152],"still":[29],"challenging.":[30],"Severe":[31],"non-stationary":[32],"noises":[33],"not":[34],"only":[35],"corrupt":[36],"the":[37,50,56,106,134,149],"harmonic":[38],"structure":[39],"voiced":[41,53],"intervals":[42],"but":[43],"also":[44],"make":[45],"it":[46],"difficult":[47],"to":[48,154],"determine":[49],"existence":[51],"of":[52,58],"speech.":[54],"Given":[55],"importance":[57],"voicing":[59,77,114],"detection":[60,115],"tracking,":[63],"this":[64],"study":[65],"proposes":[66],"neural":[68],"cascade":[69,80],"architecture":[70,81],"that":[71,104,129,148],"jointly":[72],"performs":[73],"estimation":[75,112],"and":[76,87,92,98,113,141,158],"detection.":[78],"The":[79],"optimizes":[82],"enhancement":[85,107],"module":[86,108],"module,":[91],"trained":[94],"speaker-independent":[97],"noise-independent":[99],"way.":[100],"It":[101],"observed":[103],"incorporating":[105],"improves":[109],"both":[110],"accuracy,":[116],"especially":[117],"low":[119],"signal-to-noise":[120],"ratio":[121],"(SNR)":[122],"conditions.":[123],"In":[124],"addition,":[125],"compared":[126],"with":[127],"frameworks":[128],"combine":[130],"corresponding":[131],"single-task":[132],"models,":[133],"proposed":[135,150],"multi-task":[136],"framework":[137],"achieves":[138],"better":[139],"more":[143],"efficient.":[144],"Experimental":[145],"results":[146],"show":[147],"method":[151],"robust":[153],"different":[155],"noise":[156],"conditions":[157],"substantially":[159],"outperforms":[160],"other":[161],"competitive":[162],"methods.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-09-14T00:00:00"}
