{"id":"https://openalex.org/W3016243847","doi":"https://doi.org/10.1109/icassp40776.2020.9054582","title":"Singing Voice Conversion with Disentangled Representations of Singer and Vocal Technique Using Variational Autoencoders","display_name":"Singing Voice Conversion with Disentangled Representations of Singer and Vocal Technique Using Variational Autoencoders","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3016243847","doi":"https://doi.org/10.1109/icassp40776.2020.9054582","mag":"3016243847"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029344951","display_name":"Yin-Jyun Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yin-Jyun Luo","raw_affiliation_strings":["Singapore University of Technology and Design"],"affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028115568","display_name":"Chin-Cheng Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin-Cheng Hsu","raw_affiliation_strings":["University of Southern California,Los Angeles,United States","University of Southern California, Los Angeles, United States"],"affiliations":[{"raw_affiliation_string":"University of Southern California,Los Angeles,United States","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"University of Southern California, Los Angeles, United States","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078786239","display_name":"Kat Agres","orcid":"https://orcid.org/0000-0001-7260-2447"},"institutions":[{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kat Agres","raw_affiliation_strings":["A*STAR,Institute of High Performance Computing,Singapore","Institute of High Performance Computing, A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"A*STAR,Institute of High Performance Computing,Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute of High Performance Computing, A*STAR, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069548004","display_name":"Dorien Herremans","orcid":"https://orcid.org/0000-0001-8607-1640"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dorien Herremans","raw_affiliation_strings":["Singapore University of Technology and Design"],"affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design","institution_ids":["https://openalex.org/I152815399"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5029344951"],"corresponding_institution_ids":["https://openalex.org/I152815399"],"apc_list":null,"apc_paid":null,"fwci":5.0006,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.96306781,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3277","last_page":"3281"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7362545132637024},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7251315116882324},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6910231113433838},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.6844467520713806},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.6382061839103699},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6066797971725464},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5211570262908936},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49694398045539856},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44186729192733765},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.43643391132354736},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3733285665512085},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.09453621506690979}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7362545132637024},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7251315116882324},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6910231113433838},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.6844467520713806},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.6382061839103699},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6066797971725464},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5211570262908936},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49694398045539856},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44186729192733765},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.43643391132354736},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3733285665512085},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.09453621506690979},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1482298176","https://openalex.org/W1509691205","https://openalex.org/W1522301498","https://openalex.org/W1533861849","https://openalex.org/W1554187497","https://openalex.org/W1590808459","https://openalex.org/W1731081199","https://openalex.org/W1959608418","https://openalex.org/W2201092681","https://openalex.org/W2294038178","https://openalex.org/W2532494225","https://openalex.org/W2556467266","https://openalex.org/W2730106296","https://openalex.org/W2903032817","https://openalex.org/W2921857201","https://openalex.org/W2963539064","https://openalex.org/W2963568578","https://openalex.org/W2964121744","https://openalex.org/W2972812066","https://openalex.org/W2989955315","https://openalex.org/W3012498027","https://openalex.org/W3102751229","https://openalex.org/W4289383906","https://openalex.org/W6631190155","https://openalex.org/W6631943919","https://openalex.org/W6637618735","https://openalex.org/W6640963894","https://openalex.org/W6687630728","https://openalex.org/W6697024540","https://openalex.org/W6755300632","https://openalex.org/W6757202746","https://openalex.org/W6765147365"],"related_works":["https://openalex.org/W2669956259","https://openalex.org/W2138997758","https://openalex.org/W4287995534","https://openalex.org/W2998168123","https://openalex.org/W3107519190","https://openalex.org/W2939353110","https://openalex.org/W4379653318","https://openalex.org/W2897995864","https://openalex.org/W4327774331","https://openalex.org/W2292254049"],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2,51,70,76,125],"flexible":[3],"framework":[4],"that":[5,83],"deals":[6],"with":[7,50],"both":[8],"singer":[9,44,90,118],"conversion":[10,98,116],"and":[11,27,46,92,95,120],"singers":[12],"vocal":[13,47,93,121],"technique":[14,48,94,122],"conversion.":[15],"The":[16],"proposed":[17],"model":[18,85],"is":[19,57,86,109],"trained":[20],"on":[21,124],"non-parallel":[22],"corpora,":[23],"accommodates":[24],"many-to-many":[25],"conversion,":[26],"leverages":[28],"recent":[29],"advances":[30],"of":[31,43,78,99,105,117],"variational":[32],"autoencoders.":[33],"It":[34],"employs":[35],"separate":[36],"encoders":[37],"to":[38,88,113],"learn":[39],"disentangled":[40],"latent":[41,67],"representations":[42],"identity":[45,91,119],"separately,":[49],"joint":[52],"decoder":[53],"for":[54],"reconstruction.":[55],"Conversion":[56],"carried":[58],"out":[59],"by":[60],"simple":[61],"vector":[62],"arithmetic":[63],"in":[64],"the":[65,79,103,110],"learned":[66],"spaces.":[68],"Both":[69],"quantitative":[71],"analysis":[72],"as":[73,75],"well":[74],"visualization":[77],"converted":[80],"spectrograms":[81],"show":[82],"our":[84,106],"able":[87],"disentangle":[89],"successfully":[96],"perform":[97],"these":[100],"attributes.":[101],"To":[102],"best":[104],"knowledge,":[107],"this":[108],"first":[111],"work":[112],"jointly":[114],"tackle":[115],"based":[123],"deep":[126],"learning":[127],"approach.":[128]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
