{"id":"https://openalex.org/W3201403628","doi":"https://doi.org/10.1109/taslp.2021.3111568","title":"Native-Nonnative Voice Conversion by Residual Warping in a Sparse, Anchor-Based Representation","display_name":"Native-Nonnative Voice Conversion by Residual Warping in a Sparse, Anchor-Based Representation","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3201403628","doi":"https://doi.org/10.1109/taslp.2021.3111568","mag":"3201403628"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3111568","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3111568","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077682246","display_name":"Christopher Liberatore","orcid":"https://orcid.org/0000-0002-5871-0596"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Christopher Liberatore","raw_affiliation_strings":["Computer Science and Engineering, Texas A&M University College Station, 14736 College Station, Texas, United States, (e-mail: cliberatore@tamu.edu)","[Texas A&M University, College station, TX, USA]"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, Texas A&M University College Station, 14736 College Station, Texas, United States, (e-mail: cliberatore@tamu.edu)","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"[Texas A&M University, College station, TX, USA]","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5077682246"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69024415,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"29","issue":null,"first_page":"3040","last_page":"3051"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sabr-volatility-model","display_name":"SABR volatility model","score":0.7033672332763672},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6950708627700806},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6860829591751099},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6786876916885376},{"id":"https://openalex.org/keywords/image-warping","display_name":"Image warping","score":0.6182829141616821},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4899515211582184},{"id":"https://openalex.org/keywords/formant","display_name":"Formant","score":0.4363980293273926},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4293700158596039},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.4277094900608063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3457280397415161},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18741631507873535},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17189297080039978},{"id":"https://openalex.org/keywords/vowel","display_name":"Vowel","score":0.11043542623519897}],"concepts":[{"id":"https://openalex.org/C187625094","wikidata":"https://www.wikidata.org/wiki/Q7388452","display_name":"SABR volatility model","level":4,"score":0.7033672332763672},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6950708627700806},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6860829591751099},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6786876916885376},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.6182829141616821},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4899515211582184},{"id":"https://openalex.org/C158215666","wikidata":"https://www.wikidata.org/wiki/Q1414685","display_name":"Formant","level":3,"score":0.4363980293273926},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4293700158596039},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.4277094900608063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3457280397415161},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18741631507873535},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17189297080039978},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.11043542623519897},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C85393063","wikidata":"https://www.wikidata.org/wiki/Q596307","display_name":"Stochastic volatility","level":3,"score":0.0},{"id":"https://openalex.org/C91602232","wikidata":"https://www.wikidata.org/wiki/Q756115","display_name":"Volatility (finance)","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3111568","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3111568","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7799999713897705}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W93060608","https://openalex.org/W95152782","https://openalex.org/W1513358034","https://openalex.org/W1576602419","https://openalex.org/W1963778986","https://openalex.org/W1965912016","https://openalex.org/W1974745215","https://openalex.org/W1977362459","https://openalex.org/W1991682319","https://openalex.org/W2004299580","https://openalex.org/W2007023536","https://openalex.org/W2017742648","https://openalex.org/W2020304287","https://openalex.org/W2022241205","https://openalex.org/W2023407680","https://openalex.org/W2063978378","https://openalex.org/W2105698384","https://openalex.org/W2112447569","https://openalex.org/W2113717645","https://openalex.org/W2120605154","https://openalex.org/W2121387787","https://openalex.org/W2124331435","https://openalex.org/W2125047278","https://openalex.org/W2135046866","https://openalex.org/W2139246688","https://openalex.org/W2143076222","https://openalex.org/W2147000783","https://openalex.org/W2152205330","https://openalex.org/W2156477760","https://openalex.org/W2156885227","https://openalex.org/W2165674530","https://openalex.org/W2169652224","https://openalex.org/W2169878657","https://openalex.org/W2171220918","https://openalex.org/W2293412408","https://openalex.org/W2343023855","https://openalex.org/W2400588950","https://openalex.org/W2484196375","https://openalex.org/W2509002297","https://openalex.org/W2518406046","https://openalex.org/W2519091744","https://openalex.org/W2577042574","https://openalex.org/W2620947159","https://openalex.org/W2675443237","https://openalex.org/W2733416080","https://openalex.org/W2749651610","https://openalex.org/W2785608393","https://openalex.org/W2786868129","https://openalex.org/W2795109282","https://openalex.org/W2807879808","https://openalex.org/W2888954148","https://openalex.org/W2888982879","https://openalex.org/W2889064624","https://openalex.org/W2890402938","https://openalex.org/W2890843704","https://openalex.org/W2949382160","https://openalex.org/W2962760690","https://openalex.org/W2963927338","https://openalex.org/W2983786745","https://openalex.org/W3097001834","https://openalex.org/W3163600291","https://openalex.org/W4295731579","https://openalex.org/W6603807496","https://openalex.org/W6603838645","https://openalex.org/W6676727762","https://openalex.org/W6696913934","https://openalex.org/W6713218866","https://openalex.org/W6732251480"],"related_works":["https://openalex.org/W2046217390","https://openalex.org/W4365503437","https://openalex.org/W2362995206","https://openalex.org/W2004825552","https://openalex.org/W1986672430","https://openalex.org/W37481168","https://openalex.org/W1990629684","https://openalex.org/W19748186","https://openalex.org/W2611973037","https://openalex.org/W2131825398"],"abstract_inverted_index":{"Voice":[0],"conversion":[1,166],"(VC)":[2],"techniques":[3],"can":[4],"be":[5,118],"used":[6],"to":[7,14,29,86,111,117,120,147,176,192,210],"synthesize":[8],"utterances":[9],"from":[10,138],"second":[11],"language":[12],"learners":[13,24],"appear":[15],"as":[16,59],"if":[17],"they":[18],"have":[19],"a":[20,39,60,104,182],"native":[21],"accent,":[22],"providing":[23],"with":[25],"an":[26,57],"ideal":[27],"target":[28,124],"imitate":[30],"in":[31,82,129,170],"pronunciation":[32],"training.":[33],"In":[34,96],"prior":[35],"work,":[36],"we":[37,99],"presented":[38],"low-resource":[40],"technique":[41],"called":[42],"SABR":[43,67],"(Sparse,":[44],"Anchor-Based":[45],"Representation":[46],"of":[47,64,79,107,122],"Speech),":[48],"which":[49,102],"uses":[50,103],"acoustic":[51,77],"anchorsone":[52],"per":[53],"English":[54],"phonemeto":[55],"represent":[56],"utterance":[58],"sparse,":[61],"linear":[62,105],"combination":[63,106],"nonnegative":[65],"weights.":[66],"produces":[68],"intelligible":[69],"speech,":[70],"but":[71],"its":[72],"compact":[73,94],"size":[74],"limits":[75],"the":[76,80,87,93,113,123,133,139,159,177,198,201,207],"quality":[78,186],"synthesis,":[81],"large":[83],"part":[84],"due":[85],"significant":[88],"residual":[89,115,202,208],"left":[90],"out":[91],"by":[92],"model.":[95],"this":[97],"article,":[98],"propose":[100],"SABR+Res,":[101],"frequency":[108],"warp":[109],"transforms":[110],"convert":[112],"source":[114],"spectrum":[116],"closer":[119],"that":[121,156],"speaker":[125,194],"and":[126,141,144,150,167,205],"use":[127],"it":[128],"synthesis.":[130],"We":[131,154,196],"evaluate":[132],"proposed":[134],"method":[135],"on":[136,187],"speakers":[137],"ARCTIC":[140],"L2-ARCTIC":[142],"databases":[143],"compare":[145],"them":[146],"state-of-the-art":[148],"exemplar":[149],"frequency-warping":[151],"VC":[152,162],"methods.":[153],"find":[155],"SABR+Res":[157,180],"had":[158,181],"lowest":[160],"objective":[161],"error":[163],"for":[164,200],"native-to-nonnative":[165,188],"was":[168],"preferred":[169],"subjective":[171],"tests.":[172],"Additionally,":[173],"when":[174],"compared":[175],"baseline":[178],"systems,":[179],"much":[183],"higher":[184],"synthesis":[185],"speakers,":[189],"performing":[190],"similarly":[191],"native-to-native":[193],"pairs.":[195],"discuss":[197],"implications":[199],"warping":[203],"system":[204],"applying":[206],"transform":[209],"other":[211],"exemplar-based":[212],"systems.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
