{"id":"https://openalex.org/W2785765285","doi":"https://doi.org/10.1109/apsipa.2017.8282216","title":"Voice conversion based on deep neural networks for time-variant linear transformations","display_name":"Voice conversion based on deep neural networks for time-variant linear transformations","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2785765285","doi":"https://doi.org/10.1109/apsipa.2017.8282216","mag":"2785765285"},"language":"en","primary_location":{"id":"doi:10.1109/apsipa.2017.8282216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2017.8282216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021955530","display_name":"Gaku Kotani","orcid":"https://orcid.org/0000-0003-3152-0239"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Gaku Kotani","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010841595","display_name":"Daisuke Saito","orcid":"https://orcid.org/0000-0003-3200-579X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Daisuke Saito","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041213266","display_name":"Nobuaki Minematsu","orcid":"https://orcid.org/0000-0002-8778-9555"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobuaki Minematsu","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021955530"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.9751,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.82917072,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"4","issue":null,"first_page":"1259","last_page":"1262"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8171504735946655},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7979307174682617},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6709370017051697},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.6226778030395508},{"id":"https://openalex.org/keywords/data-conversion","display_name":"Data conversion","score":0.5774774551391602},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5602733492851257},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.5230268239974976},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5083181262016296},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4696238338947296},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41233164072036743},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.1392384171485901},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.07861381769180298}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8171504735946655},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7979307174682617},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6709370017051697},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.6226778030395508},{"id":"https://openalex.org/C1232282","wikidata":"https://www.wikidata.org/wiki/Q1783551","display_name":"Data conversion","level":2,"score":0.5774774551391602},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5602733492851257},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5230268239974976},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5083181262016296},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4696238338947296},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41233164072036743},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.1392384171485901},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.07861381769180298},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipa.2017.8282216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2017.8282216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2049686551","https://openalex.org/W2118850452","https://openalex.org/W2121715942","https://openalex.org/W2123003832","https://openalex.org/W2126143605","https://openalex.org/W2156142001","https://openalex.org/W2169878657","https://openalex.org/W2194775991","https://openalex.org/W2293049663","https://openalex.org/W2294351487","https://openalex.org/W2509917451","https://openalex.org/W2514429423","https://openalex.org/W6678185079","https://openalex.org/W6696767757"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W1537411440","https://openalex.org/W1984347656","https://openalex.org/W2535215250","https://openalex.org/W2024201202"],"abstract_inverted_index":{"In":[0,20],"voice":[1,44,99],"conversion,":[2],"deep":[3,90],"neural":[4,91],"networks":[5,92],"are":[6,60],"now":[7],"being":[8],"used":[9],"as":[10],"conversion":[11,35,45,106,111],"models":[12,140],"that":[13,64,72,107,120,131],"map":[14],"source":[15],"features":[16,144],"to":[17,31,145],"target":[18],"features.":[19,147],"this":[21],"framework,":[22],"it":[23],"generally":[24],"needs":[25],"a":[26,47,54,78,86,104,115],"larger":[27],"amount":[28,56,80],"of":[29,43,57,69,76,81,98],"data":[30,59],"train":[32],"more":[33],"accurate":[34],"models.":[36],"This":[37,83],"condition,":[38],"however,":[39],"will":[40],"reduce":[41],"usability":[42],"because":[46],"text-to-speech":[48],"synthesizer":[49],"can":[50,94],"be":[51],"built":[52],"when":[53],"large":[55,79],"training":[58],"available.":[61],"We":[62],"argue":[63],"we":[65,73],"should":[66],"take":[67],"advantage":[68],"top-down":[70],"knowledge":[71],"have":[74],"instead":[75],"preparing":[77],"data.":[82],"paper":[84],"proposes":[85],"novel":[87],"architecture":[88],"using":[89],"which":[93],"achieve":[95],"superior":[96],"performance":[97],"conversion.":[100],"Our":[101],"proposal":[102],"is":[103,129],"network-based":[105],"realizes":[108],"only":[109],"linear":[110,132],"but":[112],"in":[113,125],"even":[114],"time-variant":[116],"way.":[117],"Experiments":[118],"show":[119],"naturalness":[121],"improvement":[122],"was":[123],"observed":[124],"subjective":[126],"assessments.":[127],"It":[128],"considered":[130],"constraints":[133],"at":[134],"each":[135],"time":[136],"step":[137],"prevent":[138],"trained":[139],"from":[141],"converting":[142],"input":[143],"unrealistic":[146]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
