{"id":"https://openalex.org/W2021986246","doi":"https://doi.org/10.1109/iscslp.2014.6936599","title":"Pitch transformation in neural network based voice conversion","display_name":"Pitch transformation in neural network based voice conversion","publication_year":2014,"publication_date":"2014-09-01","ids":{"openalex":"https://openalex.org/W2021986246","doi":"https://doi.org/10.1109/iscslp.2014.6936599","mag":"2021986246"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2014.6936599","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2014.6936599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 9th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101692260","display_name":"Fenglong Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Feng-Long Xie","raw_affiliation_strings":["Harbin Institute of Technology, Harbin","Microsoft Research Asia, Beijing"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Microsoft Research Asia, Beijing","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100342006","display_name":"Yao Qian","orcid":"https://orcid.org/0000-0003-1855-9630"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Qian","raw_affiliation_strings":["Microsoft Research Asia, Beijing"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065394791","display_name":"Frank K. Soong","orcid":"https://orcid.org/0000-0002-9088-3577"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Frank K. Soong","raw_affiliation_strings":["Microsoft Research Asia, Beijing"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100398343","display_name":"Haifeng Li","orcid":"https://orcid.org/0000-0002-2534-2299"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Li","raw_affiliation_strings":["Harbin Institute of Technology, Harbin"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101692260"],"corresponding_institution_ids":["https://openalex.org/I204983213","https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":2.4541,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.90590808,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"4","issue":null,"first_page":"197","last_page":"200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6591900587081909},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6495056748390198},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.559973955154419},{"id":"https://openalex.org/keywords/pitch-contour","display_name":"Pitch contour","score":0.5297775268554688},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5141337513923645},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5102665424346924},{"id":"https://openalex.org/keywords/pitch-detection-algorithm","display_name":"Pitch detection algorithm","score":0.4593122601509094},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.44421297311782837},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.3231278955936432},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2784891724586487},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.24977880716323853},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11056739091873169},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06909367442131042}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6591900587081909},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6495056748390198},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.559973955154419},{"id":"https://openalex.org/C2777895490","wikidata":"https://www.wikidata.org/wiki/Q7198848","display_name":"Pitch contour","level":2,"score":0.5297775268554688},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5141337513923645},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5102665424346924},{"id":"https://openalex.org/C135622632","wikidata":"https://www.wikidata.org/wiki/Q7198851","display_name":"Pitch detection algorithm","level":3,"score":0.4593122601509094},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.44421297311782837},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3231278955936432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2784891724586487},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.24977880716323853},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11056739091873169},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06909367442131042},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2014.6936599","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2014.6936599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 9th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W59470279","https://openalex.org/W1498096035","https://openalex.org/W1567666748","https://openalex.org/W1571413787","https://openalex.org/W1776973135","https://openalex.org/W1926603263","https://openalex.org/W1963778986","https://openalex.org/W2103144802","https://openalex.org/W2105698384","https://openalex.org/W2114543868","https://openalex.org/W2118850452","https://openalex.org/W2120605154","https://openalex.org/W2123771434","https://openalex.org/W2131220060","https://openalex.org/W2156142001","https://openalex.org/W2157412983","https://openalex.org/W2168531172","https://openalex.org/W2227610995","https://openalex.org/W2294246205","https://openalex.org/W2295092504","https://openalex.org/W2341413911","https://openalex.org/W2401086277","https://openalex.org/W2401544731","https://openalex.org/W2600829178","https://openalex.org/W3177989406","https://openalex.org/W4285719527","https://openalex.org/W6634121586","https://openalex.org/W6689272646","https://openalex.org/W6697270646","https://openalex.org/W6704332025","https://openalex.org/W6713064456","https://openalex.org/W6736010183","https://openalex.org/W6798679566"],"related_works":["https://openalex.org/W2385578626","https://openalex.org/W1948499687","https://openalex.org/W2162295204","https://openalex.org/W2785978752","https://openalex.org/W2138198763","https://openalex.org/W3203541816","https://openalex.org/W2084562624","https://openalex.org/W1533931297","https://openalex.org/W1903771033","https://openalex.org/W4389356655"],"abstract_inverted_index":{"In":[0,63],"voice":[1,129],"conversion":[2,5,8,24,73,94],"task,":[3],"prosody":[4,55],"especially":[6],"pitch":[7,23,37,42,72,93,107,110],"is":[9,25],"a":[10,68],"very":[11],"challenging":[12],"research":[13],"topic":[14],"because":[15],"of":[16,20,34,48,52,103,128],"the":[17,30,35,40,49,53,59,98,113,126],"discontinuity":[18],"property":[19],"pitch.":[21],"Conventionally":[22],"always":[26],"achieved":[27],"by":[28,84],"adjusting":[29],"mean":[31],"and":[32,56,78,101,108],"variance":[33],"source":[36],"distribution":[38],"to":[39],"target":[41,109],"distribution.":[43],"This":[44],"method":[45],"removes":[46],"most":[47],"detailed":[50],"information":[51],"speaker's":[54],"only":[57],"maintains":[58],"global":[60],"F0":[61,77,104,122],"contour.":[62],"this":[64],"paper,":[65],"we":[66],"propose":[67],"neural":[69,90],"network":[70,91],"based":[71,92],"system":[74],"which":[75],"converts":[76],"spectral":[79],"features":[80],"all":[81],"together":[82],"frame":[83],"frame.":[85],"Experimental":[86],"results":[87],"show":[88],"that":[89],"can":[95,123],"significantly":[96],"reduce":[97],"Unvoiced/Voiced":[99],"error":[100],"RMSE":[102],"between":[105],"converted":[106],"compared":[111],"with":[112],"conventional":[114],"Gaussian":[115],"normalized":[116],"transformation":[117],"method.":[118],"Wavelet":[119],"decomposition":[120],"for":[121],"further":[124],"improve":[125],"performance":[127],"conversion.":[130]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
