{"id":"https://openalex.org/W4391305664","doi":"https://doi.org/10.1109/taslp.2024.3359893","title":"Statistically Guided Near-End Speech Intelligibility Improvement Through Voice Transformation and Transfer Learning","display_name":"Statistically Guided Near-End Speech Intelligibility Improvement Through Voice Transformation and Transfer Learning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391305664","doi":"https://doi.org/10.1109/taslp.2024.3359893"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3359893","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3359893","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036738241","display_name":"Ritujoy Biswas","orcid":"https://orcid.org/0000-0003-4108-1217"},"institutions":[{"id":"https://openalex.org/I4210127441","display_name":"Indian Institute of Technology Jammu","ror":"https://ror.org/02f0vsw63","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210127441"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ritujoy Biswas","raw_affiliation_strings":["Indian Institute of Technology Jammu, Jammu, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Jammu, Jammu, India","institution_ids":["https://openalex.org/I4210127441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107339473","display_name":"Karan Nathwani","orcid":"https://orcid.org/0000-0003-2531-0411"},"institutions":[{"id":"https://openalex.org/I4210127441","display_name":"Indian Institute of Technology Jammu","ror":"https://ror.org/02f0vsw63","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210127441"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Karan Nathwani","raw_affiliation_strings":["Indian Institute of Technology Jammu, Jammu, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Jammu, Jammu, India","institution_ids":["https://openalex.org/I4210127441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075520691","display_name":"Vinayak Abrol","orcid":"https://orcid.org/0000-0001-8149-8151"},"institutions":[{"id":"https://openalex.org/I110675161","display_name":"Infosys (India)","ror":"https://ror.org/03bs18y54","country_code":"IN","type":"company","lineage":["https://openalex.org/I110675161"]},{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vinayak Abrol","raw_affiliation_strings":["Infosys Center for AI, Indraprastha Institute of Information Technology Delhi, New Delhi, India"],"affiliations":[{"raw_affiliation_string":"Infosys Center for AI, Indraprastha Institute of Information Technology Delhi, New Delhi, India","institution_ids":["https://openalex.org/I119939252","https://openalex.org/I110675161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5036738241"],"corresponding_institution_ids":["https://openalex.org/I4210127441"],"apc_list":null,"apc_paid":null,"fwci":0.7536,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.64555335,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"32","issue":null,"first_page":"1445","last_page":"1456"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10822","display_name":"Acoustic Wave Phenomena Research","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7546602487564087},{"id":"https://openalex.org/keywords/formant","display_name":"Formant","score":0.7146531343460083},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6766935586929321},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.674958348274231},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.6167724132537842},{"id":"https://openalex.org/keywords/transfer-function","display_name":"Transfer function","score":0.5997655987739563},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5551747679710388},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5297546982765198},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.48778849840164185},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31477272510528564},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.17157229781150818},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07739362120628357}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7546602487564087},{"id":"https://openalex.org/C158215666","wikidata":"https://www.wikidata.org/wiki/Q1414685","display_name":"Formant","level":3,"score":0.7146531343460083},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6766935586929321},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.674958348274231},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.6167724132537842},{"id":"https://openalex.org/C81299745","wikidata":"https://www.wikidata.org/wiki/Q334269","display_name":"Transfer function","level":2,"score":0.5997655987739563},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5551747679710388},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5297546982765198},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.48778849840164185},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31477272510528564},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.17157229781150818},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07739362120628357},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3359893","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3359893","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1881794303","https://openalex.org/W1949271984","https://openalex.org/W1975675577","https://openalex.org/W1991906437","https://openalex.org/W1993201860","https://openalex.org/W2010815565","https://openalex.org/W2015787642","https://openalex.org/W2027812633","https://openalex.org/W2066754815","https://openalex.org/W2067295501","https://openalex.org/W2069964364","https://openalex.org/W2074782436","https://openalex.org/W2096429026","https://openalex.org/W2131613989","https://openalex.org/W2147002892","https://openalex.org/W2149676179","https://openalex.org/W2151484683","https://openalex.org/W2395675680","https://openalex.org/W2404548911","https://openalex.org/W2463322089","https://openalex.org/W2611963065","https://openalex.org/W2620812332","https://openalex.org/W2748191214","https://openalex.org/W2767488834","https://openalex.org/W2785220834","https://openalex.org/W2892306418","https://openalex.org/W2899126307","https://openalex.org/W2932251702","https://openalex.org/W2939302023","https://openalex.org/W2963351212","https://openalex.org/W3014038380","https://openalex.org/W3021137403","https://openalex.org/W3023765725","https://openalex.org/W3056518849","https://openalex.org/W3115145460","https://openalex.org/W3196388857","https://openalex.org/W4220928236","https://openalex.org/W4287831879","https://openalex.org/W4304775904","https://openalex.org/W4379470385","https://openalex.org/W6684352069","https://openalex.org/W6748663107"],"related_works":["https://openalex.org/W2046217390","https://openalex.org/W4365503437","https://openalex.org/W2362995206","https://openalex.org/W2004825552","https://openalex.org/W1986772939","https://openalex.org/W2037635165","https://openalex.org/W2738829087","https://openalex.org/W2542062716","https://openalex.org/W1505346162","https://openalex.org/W4200562864"],"abstract_inverted_index":{"In":[0],"recent":[1],"developments,":[2],"speech":[3,19,94],"intelligibility":[4,199],"has":[5],"been":[6],"improved":[7],"through":[8],"an":[9],"optimal":[10],"trapezoidal":[11],"transformation":[12,85,106,175,190],"function,":[13],"which":[14,154],"performed":[15],"normal":[16,91],"to":[17,32,36,43,55,65,87,92],"Lombard":[18,93],"conversion":[20],"via":[21,171],"formant":[22,45,184],"shifting.":[23],"Despite":[24],"performing":[25,115],"well,":[26],"the":[27,39,58,119,130,141,147,172,180,188],"optimization":[28],"took":[29],"very":[30],"long":[31],"converge":[33],"and":[34,166,192,195,214],"led":[35],"artifacts":[37,110],"in":[38,47,133],"modified":[40],"signal":[41],"due":[42],"aggressive":[44],"shifts":[46,185],"unvoiced":[48],"frames.":[49],"Therefore,":[50],"transfer":[51,74,125,142,161],"learning":[52,126,143],"was":[53,77,155],"used":[54],"rapidly":[56],"modify":[57],"optimized":[59],"parameters":[60,98,163],"for":[61,68],"a":[62,69,83],"target":[63],"language":[64,136],"bypass":[66],"re-optimization":[67],"new":[70,105],"language.":[71],"However,":[72],"such":[73],"across":[75,144,164],"noises":[76,145,167],"left":[78],"unaddressed.":[79],"This":[80,122],"work":[81,123],"proposes":[82],"Gaussian":[84,174,189],"function":[86,107,191],"perform":[88],"statistically":[89],"guided":[90],"conversion.":[95],"Optimizing":[96],"fewer":[97,109],"ensures":[99],"faster":[100],"convergence":[101],"than":[102],"before.":[103],"The":[104,159],"generates":[108],"during":[111],"voice":[112],"modification":[113],"while":[114],"at":[116],"par":[117],"with":[118],"earlier":[120],"function.":[121,176],"enhances":[124],"performance":[127],"by":[128,187],"mitigating":[129],"directional":[131],"nature":[132],"case":[134],"of":[135,150,162,208],"mismatch.":[137],"We":[138,177],"also":[139,178],"propose":[140],"using":[146],"comparative":[148],"estimations":[149],"noise":[151,212],"magnitude":[152],"spectra,":[153],"not":[156],"feasible":[157,170],"earlier.":[158],"simultaneous":[160],"languages":[165],"is":[168],"now":[169],"proposed":[173],"explore":[179],"statistical":[181],"difference":[182],"between":[183],"produced":[186],"its":[193],"predecessor":[194],"their":[196],"effect":[197],"on":[198,205],"improvement.":[200],"All":[201],"experiments":[202],"were":[203],"conducted":[204],"exhaustive":[206],"combinations":[207],"three":[209,215],"languages,":[210],"four":[211],"types,":[213],"SNR":[216],"levels.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
