{"id":"https://openalex.org/W3170697532","doi":"https://doi.org/10.1109/mlsp52302.2021.9596184","title":"MLP Singer: Towards Rapid Parallel Korean Singing Voice Synthesis","display_name":"MLP Singer: Towards Rapid Parallel Korean Singing Voice Synthesis","publication_year":2021,"publication_date":"2021-10-25","ids":{"openalex":"https://openalex.org/W3170697532","doi":"https://doi.org/10.1109/mlsp52302.2021.9596184","mag":"3170697532"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp52302.2021.9596184","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp52302.2021.9596184","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 31st International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.07886","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008425126","display_name":"Jaesung Tae","orcid":null},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jaesung Tae","raw_affiliation_strings":["Yale University,New Haven,CT,USA","[Yale Univ., New Haven, CT, USA]"],"affiliations":[{"raw_affiliation_string":"Yale University,New Haven,CT,USA","institution_ids":["https://openalex.org/I32971472"]},{"raw_affiliation_string":"[Yale Univ., New Haven, CT, USA]","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047164073","display_name":"Hyeongju Kim","orcid":"https://orcid.org/0000-0001-8668-0323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyeongju Kim","raw_affiliation_strings":["Neosapience, Inc.,Seoul,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Neosapience, Inc.,Seoul,Republic of Korea","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101974552","display_name":"Younggun Lee","orcid":"https://orcid.org/0000-0002-0596-9318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Younggun Lee","raw_affiliation_strings":["Neosapience, Inc.,Seoul,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Neosapience, Inc.,Seoul,Republic of Korea","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5008425126"],"corresponding_institution_ids":["https://openalex.org/I32971472"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.53856281,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8237502574920654},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7444272041320801},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6765962839126587},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6511259078979492},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.571204423904419},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.47333580255508423},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4601254463195801},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.4387891888618469},{"id":"https://openalex.org/keywords/factor","display_name":"Factor (programming language)","score":0.424893856048584},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41856229305267334},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32143524289131165},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.0854417085647583},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.07828882336616516}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8237502574920654},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7444272041320801},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6765962839126587},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6511259078979492},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.571204423904419},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.47333580255508423},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4601254463195801},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.4387891888618469},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.424893856048584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41856229305267334},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32143524289131165},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0854417085647583},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.07828882336616516},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/mlsp52302.2021.9596184","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp52302.2021.9596184","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 31st International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.07886","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.07886","pdf_url":"https://arxiv.org/pdf/2106.07886","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3170697532","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2106.07886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.07886","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.07886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"},{"id":"doi:10.17023/t5tb-gc57","is_oa":true,"landing_page_url":"https://doi.org/10.17023/t5tb-gc57","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.07886","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.07886","pdf_url":"https://arxiv.org/pdf/2106.07886","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W29794711","https://openalex.org/W116500682","https://openalex.org/W2462831000","https://openalex.org/W2471520273","https://openalex.org/W2778460379","https://openalex.org/W2899663614","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2964121744","https://openalex.org/W2973046048","https://openalex.org/W2984106626","https://openalex.org/W2985308740","https://openalex.org/W2994673210","https://openalex.org/W3015499232","https://openalex.org/W3015516707","https://openalex.org/W3037932933","https://openalex.org/W3098403858","https://openalex.org/W3112624375","https://openalex.org/W3157506437","https://openalex.org/W3211347078","https://openalex.org/W6604793353","https://openalex.org/W6631190155","https://openalex.org/W6755207826","https://openalex.org/W6771626834","https://openalex.org/W6783867762"],"related_works":["https://openalex.org/W3212754542","https://openalex.org/W3047855478","https://openalex.org/W3206191467","https://openalex.org/W3016007107","https://openalex.org/W3011892665","https://openalex.org/W3190244907","https://openalex.org/W3041080908","https://openalex.org/W3163031268","https://openalex.org/W3010665651","https://openalex.org/W2920158442","https://openalex.org/W3205154814","https://openalex.org/W3081753361","https://openalex.org/W3196667132","https://openalex.org/W3197501823","https://openalex.org/W1183242367","https://openalex.org/W3081910227","https://openalex.org/W2785765285","https://openalex.org/W3123032392","https://openalex.org/W2562274522","https://openalex.org/W2374911258"],"abstract_inverted_index":{"Recent":[0],"developments":[1],"in":[2,39,91],"deep":[3],"learning":[4],"have":[5],"significantly":[6],"improved":[7],"the":[8,40,59,66],"quality":[9,95],"of":[10,61,93,107,120],"synthesized":[11],"singing":[12,18,54],"voice":[13,19,55,76],"audio.":[14],"However,":[15],"prominent":[16],"neural":[17],"synthesis":[20,56,97],"systems":[21],"suffer":[22],"from":[23],"slow":[24],"inference":[25],"speed":[26],"due":[27],"to":[28,109],"their":[29],"autoregressive":[30,87],"design.":[31],"Inspired":[32],"by":[33],"MLP-Mixer,":[34],"a":[35,51,85,104],"novel":[36],"architecture":[37,74],"introduced":[38],"vision":[41],"literature":[42],"for":[43,75],"attention-free":[44],"image":[45],"classification,":[46],"we":[47],"propose":[48],"MLP":[49,82,101],"Singer,":[50],"parallel":[52],"Korean":[53],"system.":[57],"To":[58],"best":[60],"our":[62],"knowledge,":[63],"this":[64],"is":[65],"first":[67],"work":[68],"that":[69,81],"uses":[70],"an":[71],"entirely":[72],"MLP-based":[73],"synthesis.":[77],"Listening":[78],"tests":[79],"demonstrate":[80],"Singer":[83,102],"outperforms":[84],"larger":[86],"GAN-based":[88],"system,":[89],"both":[90,125],"terms":[92],"audio":[94],"and":[96,111,115],"speed.":[98],"In":[99],"particular,":[100],"achieves":[103],"real-time":[105],"factor":[106],"up":[108],"200":[110],"3400":[112],"on":[113,124],"CPUs":[114],"GPUs":[116],"respectively,":[117],"enabling":[118],"order":[119],"magnitude":[121],"faster":[122],"generation":[123],"environments.":[126],"<sup":[127,130],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[128,131],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[129,132],"Source":[133],"code":[134],"available":[135],"at":[136],"https://github.corn/neosapience/mlp-singer.":[137]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
