{"id":"https://openalex.org/W4372266858","doi":"https://doi.org/10.1109/icassp49357.2023.10094737","title":"Voice-Preserving Zero-Shot Multiple Accent Conversion","display_name":"Voice-Preserving Zero-Shot Multiple Accent Conversion","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372266858","doi":"https://doi.org/10.1109/icassp49357.2023.10094737"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094737","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094737","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023526681","display_name":"Mumin Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mumin Jin","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070013411","display_name":"Prashant Serai","orcid":"https://orcid.org/0000-0002-4672-4413"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prashant Serai","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057239023","display_name":"Jilong Wu","orcid":"https://orcid.org/0009-0007-8000-347X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jilong Wu","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028956985","display_name":"Vimal Manohar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vimal Manohar","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100734672","display_name":"Qing He","orcid":"https://orcid.org/0000-0001-8833-5398"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qing He","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5023526681"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5734,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85962489,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.8669310808181763},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6110764741897583},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6100937128067017},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6062152981758118},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.6057771444320679},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.5211079716682434},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5080170631408691},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.48897111415863037},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4448888301849365},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4395577609539032},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.13258889317512512}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.8669310808181763},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6110764741897583},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6100937128067017},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6062152981758118},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.6057771444320679},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.5211079716682434},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5080170631408691},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.48897111415863037},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4448888301849365},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4395577609539032},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.13258889317512512},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094737","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094737","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W942963634","https://openalex.org/W1991682319","https://openalex.org/W2888954148","https://openalex.org/W2890402938","https://openalex.org/W2955237921","https://openalex.org/W2972359262","https://openalex.org/W2972473628","https://openalex.org/W2973142754","https://openalex.org/W3015430779","https://openalex.org/W3036601975","https://openalex.org/W3092028330","https://openalex.org/W3135654121","https://openalex.org/W3140429000","https://openalex.org/W3141523618","https://openalex.org/W3178546316","https://openalex.org/W3197103763","https://openalex.org/W3204009030","https://openalex.org/W4297841851","https://openalex.org/W6780218876","https://openalex.org/W6783867762"],"related_works":["https://openalex.org/W2406877384","https://openalex.org/W2595839522","https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W3109975354","https://openalex.org/W2038083449","https://openalex.org/W2124576126","https://openalex.org/W2138455584","https://openalex.org/W2562096895","https://openalex.org/W2376590243"],"abstract_inverted_index":{"Most":[0],"people":[1],"who":[2],"have":[3,11],"tried":[4],"to":[5,76,92,115,121,141],"learn":[6],"a":[7,18,28,33,42,60],"foreign":[8],"language":[9,67],"would":[10],"experienced":[12],"difficulties":[13],"understanding":[14,25],"or":[15,26],"speaking":[16,27],"with":[17],"native":[19,23],"speaker\u2019s":[20,43,48,119],"accent.":[21],"For":[22],"speakers,":[24],"new":[29],"accent":[30,37,44,72,82,94,109,144],"is":[31,112],"likewise":[32],"difficult":[34],"task.":[35],"An":[36],"conversion":[38,73,110],"system":[39],"that":[40,47,133,138],"changes":[41],"but":[45],"preserves":[46],"voice":[49,128],"identity,":[50],"such":[51,64],"as":[52,65],"timbre":[53],"and":[54,69,81,145],"pitch,":[55],"has":[56],"the":[57,78,84,113,142,147],"potential":[58],"for":[59],"range":[61],"of":[62],"applications,":[63],"communication,":[66],"learning,":[68],"entertainment.":[70],"Existing":[71],"models":[74,111],"tend":[75],"change":[77],"speaker":[79],"identity":[80],"at":[83],"same":[85],"time.":[86],"Here,":[87],"we":[88],"use":[89],"adversarial":[90],"learning":[91],"disentangle":[93],"dependent":[95],"features":[96],"while":[97,124],"retaining":[98],"other":[99],"acoustic":[100],"characteristics.":[101],"What":[102],"sets":[103],"our":[104,134],"work":[105],"apart":[106],"from":[107],"existing":[108],"capability":[114],"convert":[116],"an":[117],"unseen":[118],"utterance":[120],"multiple":[122],"accents":[123],"preserving":[125],"its":[126],"original":[127,148],"identity.":[129],"Subjective":[130],"evaluations":[131],"show":[132],"model":[135],"generates":[136],"audio":[137],"sound":[139],"closer":[140],"target":[143],"like":[146],"speaker.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
