{"id":"https://openalex.org/W4405710119","doi":"https://doi.org/10.1109/iscslp63861.2024.10800253","title":"Wav2Nas: An Exploratory Approach to Nasalance Estimation in Speech","display_name":"Wav2Nas: An Exploratory Approach to Nasalance Estimation in Speech","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405710119","doi":"https://doi.org/10.1109/iscslp63861.2024.10800253"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800253","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800253","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035671169","display_name":"Rui Feng","orcid":"https://orcid.org/0000-0003-3345-5733"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Feng","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106540512","display_name":"Yuang Chen","orcid":"https://orcid.org/0000-0001-6892-3205"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu-Ang Chen","raw_affiliation_strings":["University of Science and Technology of China,Department of Electronic Engineering and Information Science,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Electronic Engineering and Information Science,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051501323","display_name":"Yin-Long Liu","orcid":"https://orcid.org/0009-0004-0380-0836"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yin-Long Liu","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043406907","display_name":"Jiahong Yuan","orcid":"https://orcid.org/0009-0008-2162-7167"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia-Hong Yuan","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069176378","display_name":"Zhen-Hua Ling","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5035671169"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.691,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71171887,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9470999836921692,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9470999836921692,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6722820997238159},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.5956168174743652},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5026006698608398},{"id":"https://openalex.org/keywords/exploratory-research","display_name":"Exploratory research","score":0.45598793029785156},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32755887508392334},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08651077747344971},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.054879456758499146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6722820997238159},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.5956168174743652},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5026006698608398},{"id":"https://openalex.org/C85973986","wikidata":"https://www.wikidata.org/wiki/Q1091731","display_name":"Exploratory research","level":2,"score":0.45598793029785156},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32755887508392334},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08651077747344971},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.054879456758499146},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800253","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800253","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1966611275","https://openalex.org/W1995178197","https://openalex.org/W1997191379","https://openalex.org/W2003027589","https://openalex.org/W2022657395","https://openalex.org/W2077837162","https://openalex.org/W2148593056","https://openalex.org/W2402680858","https://openalex.org/W2898677870","https://openalex.org/W3129068537","https://openalex.org/W4225311868","https://openalex.org/W4385245566","https://openalex.org/W4385823185","https://openalex.org/W6631190155","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Nasalance,":[0],"defined":[1],"as":[2,113,155],"the":[3,62,65,83,121,126,138,146,182],"ratio":[4],"of":[5,25,44,134,148,184],"nasal":[6,86],"energy":[7,11],"to":[8,60,95,115,158,201],"total":[9],"acoustic":[10],"during":[12,70],"speech,":[13],"is":[14],"an":[15,52],"important":[16],"metric":[17],"in":[18,175,188],"speech":[19,48,71,77,99,176,206],"science":[20],"and":[21,67,85,88,109,140,171,192,204],"clinical":[22,190],"phonetics.":[23],"Measurement":[24],"nasalance,":[26],"how-ever,":[27],"requires":[28],"specialized":[29],"equipment,":[30],"which":[31],"has":[32],"severely":[33],"limited":[34],"its":[35],"widespread":[36],"applications.":[37],"In":[38],"this":[39,179],"study,":[40],"we":[41],"explored":[42],"methods":[43,187],"predicting":[45],"nasalance":[46,97,160],"from":[47,64,82,98,162],"waveforms.":[49,100],"We":[50,101],"designed":[51],"oral-nasal":[53],"separation":[54],"mask":[55],"with":[56],"thermal":[57],"flow":[58],"sensors":[59],"record":[61],"airflows":[63],"mouth":[66],"nose":[68],"separately":[69],"production,":[72],"alongside":[73],"a":[74],"microphone":[75],"recording":[76],"sounds.":[78],"Nasalance":[79],"was":[80],"calculated":[81],"oral":[84],"airflows,":[87],"multilayer":[89],"perceptron":[90],"(MLP)":[91],"models":[92,153],"were":[93],"trained":[94],"predict":[96,159],"compared":[102],"Mel-spectrogram,":[103],"Mel":[104],"Frequency":[105],"Cepstral":[106],"Coefficients":[107],"(MFCC),":[108],"Wav2vec":[110,122,156],"2.0":[111,157],"features":[112,124],"inputs":[114],"MLPs.":[116],"The":[117],"results":[118],"demonstrated":[119],"that":[120],"2.0-based":[123],"have":[125],"highest":[127],"Pearson":[128],"Product":[129],"Moment":[130],"Correlation":[131],"Coefficient":[132],"(PPMC)":[133],"0.7459,":[135],"outperforming":[136],"both":[137],"Mel-spectrogram":[139],"MFCC":[141],"baselines.":[142],"These":[143],"findings":[144],"emphasize":[145],"potential":[147],"leveraging":[149],"pre-trained":[150],"deep":[151,185],"learning":[152,186],"such":[154],"directly":[161],"raw":[163],"audio":[164],"data,":[165],"reducing":[166],"reliance":[167],"on":[168],"expensive":[169],"instruments":[170],"improving":[172],"diagnostic":[173],"capabilities":[174],"pathology.":[177],"Moreover,":[178],"paper":[180],"under-scores":[181],"promise":[183],"advancing":[189],"assessment":[191],"opens":[193],"up":[194],"new":[195],"avenues":[196],"for":[197],"applying":[198],"compu-tational":[199],"techniques":[200],"better":[202],"understand":[203],"treat":[205],"disorders.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
