{"id":"https://openalex.org/W3173432002","doi":"https://doi.org/10.1587/transinf.2020edp7252","title":"Real-Time Full-Band Voice Conversion with Sub-Band Modeling and Data-Driven Phase Estimation of Spectral Differentials","display_name":"Real-Time Full-Band Voice Conversion with Sub-Band Modeling and Data-Driven Phase Estimation of Spectral Differentials","publication_year":2021,"publication_date":"2021-06-30","ids":{"openalex":"https://openalex.org/W3173432002","doi":"https://doi.org/10.1587/transinf.2020edp7252","mag":"3173432002"},"language":"en","primary_location":{"id":"doi:10.1587/transinf.2020edp7252","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2020edp7252","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E104.D/7/E104.D_2020EDP7252/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.jstage.jst.go.jp/article/transinf/E104.D/7/E104.D_2020EDP7252/_pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025983709","display_name":"Takaaki Saeki","orcid":"https://orcid.org/0000-0001-6003-768X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takaaki SAEKI","raw_affiliation_strings":["University of Tokyo"],"affiliations":[{"raw_affiliation_string":"University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083394213","display_name":"Yuki Saito","orcid":"https://orcid.org/0000-0002-7967-2613"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki SAITO","raw_affiliation_strings":["University of Tokyo"],"affiliations":[{"raw_affiliation_string":"University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke TAKAMICHI","raw_affiliation_strings":["University of Tokyo"],"affiliations":[{"raw_affiliation_string":"University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi SARUWATARI","raw_affiliation_strings":["University of Tokyo"],"affiliations":[{"raw_affiliation_string":"University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5025983709"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.4571,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60351589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"E104.D","issue":"7","first_page":"1002","last_page":"1016"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7418212294578552},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.541329026222229},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4534951150417328},{"id":"https://openalex.org/keywords/hilbert-transform","display_name":"Hilbert transform","score":0.44282495975494385},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3404378592967987},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.1274910867214203}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7418212294578552},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.541329026222229},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4534951150417328},{"id":"https://openalex.org/C28799612","wikidata":"https://www.wikidata.org/wiki/Q685437","display_name":"Hilbert transform","level":3,"score":0.44282495975494385},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3404378592967987},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.1274910867214203}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1587/transinf.2020edp7252","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2020edp7252","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E104.D/7/E104.D_2020EDP7252/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1587/transinf.2020edp7252","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2020edp7252","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E104.D/7/E104.D_2020EDP7252/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3173432002.pdf","grobid_xml":"https://content.openalex.org/works/W3173432002.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W1509691205","https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W2013996527","https://openalex.org/W2049686551","https://openalex.org/W2052871313","https://openalex.org/W2063678701","https://openalex.org/W2093450784","https://openalex.org/W2118850452","https://openalex.org/W2120605154","https://openalex.org/W2126143605","https://openalex.org/W2145892079","https://openalex.org/W2156142001","https://openalex.org/W2170269172","https://openalex.org/W2406654659","https://openalex.org/W2471520273","https://openalex.org/W2567070169","https://openalex.org/W2747744257","https://openalex.org/W2749651610","https://openalex.org/W2785516183","https://openalex.org/W2794725088","https://openalex.org/W2888908158","https://openalex.org/W2902070858","https://openalex.org/W2922104641","https://openalex.org/W2937579788","https://openalex.org/W2949676527","https://openalex.org/W2962760690","https://openalex.org/W2963091184","https://openalex.org/W2963175743","https://openalex.org/W2963808252","https://openalex.org/W2963970792","https://openalex.org/W2963971656","https://openalex.org/W2973216307","https://openalex.org/W2989528444","https://openalex.org/W3015434413","https://openalex.org/W3016120005","https://openalex.org/W3034420534","https://openalex.org/W3081800019","https://openalex.org/W3094923859","https://openalex.org/W3196966628"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W2350741829"],"abstract_inverted_index":{"This":[0,46],"paper":[1],"proposes":[2],"two":[3],"high-fidelity":[4],"and":[5,88,162,176,229,249],"computationally":[6],"efficient":[7],"neural":[8],"voice":[9],"conversion":[10,33],"(VC)":[11],"methods":[12,186],"based":[13,136,200],"on":[14,137,201],"a":[15,28,54,64,103,108,117,132,270],"direct":[16],"waveform":[17],"modification":[18],"using":[19,53],"spectral":[20],"differentials.":[21],"The":[22,205],"conventional":[23],"spectral-differential":[24],"VC":[25,38,194,216,238,255],"method":[26,72,110,135,213,235],"with":[27,160,269],"minimum-phase":[29],"filter":[30,127],"achieves":[31],"high-quality":[32],"for":[34,111,145,214,236],"narrow-band":[35,215],"(16":[36],"kHz-sampled)":[37,76],"but":[39],"requires":[40],"heavy":[41,82],"computational":[42,79,151,247],"cost":[43,80],"in":[44,63,97,262],"filtering.":[45],"is":[47,81,199],"because":[48],"the":[49,58,71,78,89,98,120,150,169,181,184,190,202,210,219,226,231,241,246,266],"minimum":[50],"phase":[51,113],"obtained":[52],"fixed":[55],"lifter":[56,118],"of":[57,119,157,183,189,273,278],"Hilbert":[59,121],"transform":[60,122],"often":[61],"results":[62,206],"long-tap":[65],"filter.":[66],"Furthermore,":[67],"when":[68],"we":[69,106,196],"extend":[70],"to":[73,84,94,179,222],"full-band":[74,146,193,237,254],"(48":[75],"VC,":[77],"due":[83,93],"increased":[85],"sampling":[86,155],"points,":[87],"converted-speech":[90,164,227,242],"quality":[91,165,243],"degrades":[92],"large":[95],"fluctuations":[96],"high-frequency":[99],"band.":[100,171],"To":[101],"construct":[102],"short-tap":[104],"filter,":[105],"propose":[107,131],"lifter-training":[109,212],"data-driven":[112],"reconstruction":[114],"that":[115,208],"trains":[116],"by":[123,153,166],"taking":[124],"into":[125],"account":[126],"truncation.":[128],"We":[129,172],"also":[130],"frequency-band-wise":[133],"modeling":[134,143,167,234],"sub-band":[138,233],"multi-rate":[139],"signal":[140],"processing":[141],"(sub-band":[142],"method)":[144],"VC.":[147],"It":[148],"enhances":[149],"efficiency":[152],"reducing":[154,245],"points":[156],"signals":[158],"converted":[159,267],"filtering":[161],"improves":[163],"only":[168],"low-frequency":[170],"conducted":[173],"several":[174],"objective":[175],"subjective":[177],"evaluations":[178],"investigate":[180],"effectiveness":[182],"proposed":[185,203,211,232],"through":[187],"implementation":[188],"real-time,":[191,252],"online,":[192,253],"system":[195,256],"developed,":[197],"which":[198],"methods.":[204],"indicate":[207],"1)":[209],"can":[217,239,257],"shorten":[218],"tap":[220],"length":[221],"1/16":[223],"without":[224],"degrading":[225],"quality,":[228],"2)":[230],"improve":[240],"while":[244],"cost,":[248],"3)":[250],"our":[251],"convert":[258],"48":[259],"kHz-sampled":[260],"speech":[261,268],"real":[263],"time":[264],"attaining":[265],"3.6":[271],"out":[272],"5.0":[274],"mean":[275],"opinion":[276],"score":[277],"naturalness.":[279]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
