{"id":"https://openalex.org/W4367164141","doi":"https://doi.org/10.1109/access.2023.3270699","title":"Non-Parallel Whisper-to-Normal Speaking Style Conversion Using Auxiliary Classifier Variational Autoencoder","display_name":"Non-Parallel Whisper-to-Normal Speaking Style Conversion Using Auxiliary Classifier Variational Autoencoder","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4367164141","doi":"https://doi.org/10.1109/access.2023.3270699"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3270699","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1109/access.2023.3270699","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10109017.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10109017.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018003761","display_name":"Shogo Seki","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shogo Seki","raw_affiliation_strings":["NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan"],"raw_orcid":"https://orcid.org/0009-0007-3990-3740","affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001243214","display_name":"Hirokazu Kameoka","orcid":"https://orcid.org/0000-0003-3102-0162"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirokazu Kameoka","raw_affiliation_strings":["NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan"],"raw_orcid":"https://orcid.org/0000-0003-3102-0162","affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020693766","display_name":"Takuhiro Kaneko","orcid":"https://orcid.org/0009-0000-8016-5144"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takuhiro Kaneko","raw_affiliation_strings":["NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004248530","display_name":"Kou Tanaka","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kou Tanaka","raw_affiliation_strings":["NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, Atsugi, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Communication Science Laboratories, Nippon Telegraph and Telephone Corporation, 3-1 Morinosato Wakamiya, Atsugi-shi, Kanagawa, Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018003761"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.5055,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69754503,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"11","issue":null,"first_page":"44590","last_page":"44599"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.9065040946006775},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6270006895065308},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5361535549163818},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46176427602767944},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42734211683273315},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41033047437667847},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3383753299713135},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1779114007949829}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.9065040946006775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6270006895065308},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5361535549163818},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46176427602767944},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42734211683273315},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41033047437667847},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3383753299713135},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1779114007949829}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3270699","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1109/access.2023.3270699","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10109017.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b1e162ef0003421a9cc91e7eff195414","is_oa":true,"landing_page_url":"https://doaj.org/article/b1e162ef0003421a9cc91e7eff195414","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 44590-44599 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3270699","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1109/access.2023.3270699","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10109017.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2664191106","display_name":"\u97f3\u30e1\u30c7\u30a3\u30a2\u30b3\u30df\u30e5\u30cb\u30b1\u30fc\u30b7\u30e7\u30f3\u306b\u304a\u3051\u308b\u5171\u5275\u578b\u6a5f\u80fd\u62e1\u5f35\u6280\u8853\u306e\u5275\u51fa","funder_award_id":"JPMJCR19A3","funder_id":"https://openalex.org/F4320334789","funder_display_name":"Japan Science and Technology Agency"}],"funders":[{"id":"https://openalex.org/F4320334789","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4367164141.pdf","grobid_xml":"https://content.openalex.org/works/W4367164141.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W95152782","https://openalex.org/W1552314771","https://openalex.org/W1959608418","https://openalex.org/W1965255698","https://openalex.org/W2022125261","https://openalex.org/W2056852181","https://openalex.org/W2064675550","https://openalex.org/W2108501770","https://openalex.org/W2123003832","https://openalex.org/W2141998673","https://openalex.org/W2142300631","https://openalex.org/W2471520273","https://openalex.org/W2532494225","https://openalex.org/W2603777577","https://openalex.org/W2613904329","https://openalex.org/W2766672686","https://openalex.org/W2804998325","https://openalex.org/W2889061305","https://openalex.org/W2899877258","https://openalex.org/W2902070858","https://openalex.org/W2937579788","https://openalex.org/W2945478979","https://openalex.org/W2946555236","https://openalex.org/W2962793481","https://openalex.org/W2963539064","https://openalex.org/W2963767194","https://openalex.org/W2964058413","https://openalex.org/W2972394484","https://openalex.org/W2972541922","https://openalex.org/W2972544500","https://openalex.org/W2972659941","https://openalex.org/W2972667718","https://openalex.org/W2992384298","https://openalex.org/W3015338123","https://openalex.org/W3034420534","https://openalex.org/W3095936335","https://openalex.org/W3100696337","https://openalex.org/W3102628737","https://openalex.org/W3113687514","https://openalex.org/W3118753411","https://openalex.org/W3162512456","https://openalex.org/W3162879338","https://openalex.org/W3163475957","https://openalex.org/W4320013936","https://openalex.org/W6603838645","https://openalex.org/W6640963894","https://openalex.org/W6675944832","https://openalex.org/W6737778391","https://openalex.org/W6762533536"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W2159052453","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W2803255133","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W4321789545"],"abstract_inverted_index":{"This":[0],"paper":[1],"is":[2,27,76],"concerned":[3],"with":[4],"non-parallel":[5],"whisper-to-normal":[6],"speaking-style":[7,108],"conversion":[8,29,203],"(W2N-SC),":[9],"which":[10,31],"converts":[11,32],"whispered":[12,56,65,98],"speech":[13,16,57,66,74,111,184],"into":[14],"normal":[15,54,73,100],"without":[17],"using":[18,182],"parallel":[19],"training":[20,173],"data.":[21],"Most":[22],"relevant":[23],"to":[24,36,80,99,135,154],"this":[25,155],"task":[26,41,47],"voice":[28,35,95],"(VC),":[30],"one":[33],"speaker\u2019s":[34],"another.":[37],"However,":[38],"the":[39,44,85,107,161,167,199,210,213,216,222],"W2N-SC":[40,200,211],"differs":[42],"from":[43,97],"regular":[45],"VC":[46,121,145,224],"in":[48,84,115,209],"three":[49],"main":[50],"respects.":[51],"First,":[52],"unlike":[53],"speech,":[55,101],"contains":[58],"little":[59],"or":[60,102],"no":[61],"pitch":[62],"information.":[63],"Second,":[64],"usually":[67],"has":[68],"significantly":[69],"less":[70],"energy":[71],"than":[72,221],"and":[75,129,150,186,201],"therefore":[77],"more":[78],"susceptible":[79],"external":[81],"noise.":[82],"Third,":[83],"actual":[86],"usage":[87],"scenario":[88],"of":[89,109,169,198],"W2N-SC,":[90],"users":[91],"may":[92],"suddenly":[93],"switch":[94],"modes":[96],"vice":[103],"versa,":[104],"meaning":[105],"that":[106,190],"input":[110],"cannot":[112],"be":[113,133,158],"assumed":[114],"advance.":[116],"To":[117],"clarify":[118],"whether":[119],"existing":[120],"techniques":[122],"can":[123],"successfully":[124],"handle":[125],"these":[126],"task-specific":[127],"concerns":[128],"how":[130],"they":[131],"should":[132],"modified":[134],"better":[136,220],"address":[137],"them,":[138],"we":[139,165,205],"consider":[140],"a":[141,148,171],"variational":[142],"autoencoder":[143],"(VAE)-based":[144],"method":[146,156],"as":[147,227],"baseline":[149,223],"examine":[151],"what":[152],"modifications":[153,218],"would":[157],"effective":[159],"for":[160,192],"current":[162],"task.":[163],"Specifically,":[164],"study":[166],"effects":[168],"1)":[170],"self-supervised":[172],"scheme":[174],"called":[175],"filling-in-frames":[176],"(FIF);":[177],"2)":[178],"data":[179],"augmentation":[180],"(DA)":[181],"noisy":[183],"samples;":[185],"3)":[187],"an":[188],"architecture":[189],"allows":[191],"any-to-many":[193],"conversions.":[194],"Through":[195],"experimental":[196],"evaluation":[197],"speaker":[202],"tasks,":[204],"confirmed":[206],"that,":[207],"especially":[208],"task,":[212],"version":[214],"incorporating":[215],"above":[217],"works":[219],"model":[225],"applied":[226],"is.":[228]},"counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
