{"id":"https://openalex.org/W2890843704","doi":"https://doi.org/10.1109/icassp.2018.8462589","title":"Voice Conversion Through Residual Warping in a Sparse, Anchor-Based Representation of Speech","display_name":"Voice Conversion Through Residual Warping in a Sparse, Anchor-Based Representation of Speech","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2890843704","doi":"https://doi.org/10.1109/icassp.2018.8462589","mag":"2890843704"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077682246","display_name":"Christopher Liberatore","orcid":"https://orcid.org/0000-0002-5871-0596"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Liberatore","raw_affiliation_strings":["Texas A&M University, College Station, Texas, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, Texas, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078020198","display_name":"Guanlong Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guanlong Zhao","raw_affiliation_strings":["Texas A&M University, College Station, Texas, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, Texas, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062423099","display_name":"Ricardo Guti\u00e9rrez\u2010Osuna","orcid":"https://orcid.org/0000-0003-2817-2085"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ricardo Gutierrez-Osuna","raw_affiliation_strings":["Texas A&M University, College Station, Texas, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, Texas, USA","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5069,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.74242798,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5284","last_page":"5288"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sabr-volatility-model","display_name":"SABR volatility model","score":0.951876163482666},{"id":"https://openalex.org/keywords/image-warping","display_name":"Image warping","score":0.756514310836792},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.668023407459259},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6240689158439636},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.606939435005188},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5418432950973511},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4214581251144409},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30277127027511597},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1786559522151947},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17714476585388184}],"concepts":[{"id":"https://openalex.org/C187625094","wikidata":"https://www.wikidata.org/wiki/Q7388452","display_name":"SABR volatility model","level":4,"score":0.951876163482666},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.756514310836792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.668023407459259},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6240689158439636},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.606939435005188},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5418432950973511},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4214581251144409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30277127027511597},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1786559522151947},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17714476585388184},{"id":"https://openalex.org/C85393063","wikidata":"https://www.wikidata.org/wiki/Q596307","display_name":"Stochastic volatility","level":3,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C91602232","wikidata":"https://www.wikidata.org/wiki/Q756115","display_name":"Volatility (finance)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2018.8462589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W95152782","https://openalex.org/W1965912016","https://openalex.org/W1977362459","https://openalex.org/W2005438552","https://openalex.org/W2017742648","https://openalex.org/W2063978378","https://openalex.org/W2120605154","https://openalex.org/W2121387787","https://openalex.org/W2135046866","https://openalex.org/W2136166660","https://openalex.org/W2139246688","https://openalex.org/W2140514146","https://openalex.org/W2152205330","https://openalex.org/W2156142001","https://openalex.org/W2156477760","https://openalex.org/W2165674530","https://openalex.org/W2169878657","https://openalex.org/W2292984643","https://openalex.org/W2400588950","https://openalex.org/W2509002297","https://openalex.org/W2577042574","https://openalex.org/W6603838645","https://openalex.org/W6641710322","https://openalex.org/W6679836976","https://openalex.org/W6680156768","https://openalex.org/W6697163159","https://openalex.org/W6713218866","https://openalex.org/W6732251480"],"related_works":["https://openalex.org/W2549182546","https://openalex.org/W1670332068","https://openalex.org/W4283714926","https://openalex.org/W2171123343","https://openalex.org/W2095618524","https://openalex.org/W2802937241","https://openalex.org/W2767930639","https://openalex.org/W4388906216","https://openalex.org/W4307152771","https://openalex.org/W2735770592"],"abstract_inverted_index":{"In":[0,154],"previous":[1],"work":[2],"we":[3,49,64,102],"presented":[4],"a":[5,21,33,39,87,130,147,175],"Sparse,":[6],"Anchor-Based":[7],"Representation":[8],"of":[9,23,53,73,94,106,128,165,203],"speech":[10],"(SABR)":[11],"that":[12,57,188],"uses":[13],"phonemic":[14],"\u201canchors\u201d":[15],"to":[16,81,112,134],"represent":[17],"an":[18,51,122,155],"utterance":[19],"with":[20,36,121],"set":[22],"sparse":[24],"non-negative":[25],"weights.":[26],"SABR":[27,56,71,138,170,204],"is":[28],"speaker-independent:":[29],"combining":[30],"weights":[31],"from":[32,38,69,92],"source":[34,75],"speaker":[35,41,157],"anchors":[37],"target":[40,83],"can":[42,193],"be":[43],"used":[44],"for":[45,137,146],"voice":[46,60],"conversion.":[47],"Here,":[48],"present":[50],"extension":[52],"the":[54,66,70,74,82,104,108,113,163,190,199],"original":[55,114],"significantly":[58],"improves":[59],"conversion":[61],"synthesis.":[62],"Namely,":[63],"take":[65],"residual":[67,110,192],"signal":[68],"decomposition":[72],"speaker's":[76,84],"utterance,":[77],"and":[78,99,143,169],"warp":[79],"it":[80],"space":[85],"using":[86],"weighted":[88],"warping":[89],"function":[90],"learned":[91],"pairs":[93],"source-target":[95],"anchors.":[96],"Using":[97],"subjective":[98],"objective":[100],"evaluations,":[101],"examine":[103],"performance":[105],"adding":[107,189],"warped":[109,191],"(SABR+Res)":[111],"synthesis":[115,196],"(SABR).":[116],"Specifically,":[117],"listeners":[118,160],"rated":[119],"SABR+Res":[120,166],"average":[123],"mean":[124],"opinion":[125],"score":[126],"(MOS)":[127],"3.6,":[129],"significant":[131],"improvement":[132],"compared":[133],"2.2":[135],"MOS":[136,145],"alone":[139],"(p":[140,151,179],"<;":[141,152],"0.01)":[142],"2.5":[144],"baseline":[148],"GMM":[149,176],"method":[150,177],"0.01).":[153],"XAB":[156],"identity":[158,164],"test,":[159],"correctly":[161],"identified":[162],"(81":[167],"%)":[168],"(84%)":[171],"as":[172,174],"frequently":[173],"(82%)":[178],"=":[180,183],"0.70,":[181],"P":[182],"0.35).":[184],"These":[185],"results":[186],"indicate":[187],"dramatically":[194],"improve":[195],"while":[197],"retaining":[198],"desirable":[200],"independent":[201],"qualities":[202],"models.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
