{"id":"https://openalex.org/W3097566756","doi":"https://doi.org/10.1109/icassp39728.2021.9413605","title":"StyleMelGAN: An Efficient High-Fidelity Adversarial Vocoder with Temporal Adaptive Normalization","display_name":"StyleMelGAN: An Efficient High-Fidelity Adversarial Vocoder with Temporal Adaptive Normalization","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3097566756","doi":"https://doi.org/10.1109/icassp39728.2021.9413605","mag":"3097566756"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413605","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.01557","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110975574","display_name":"Ahmed Mustafa","orcid":null},"institutions":[{"id":"https://openalex.org/I4923324","display_name":"Fraunhofer Society","ror":"https://ror.org/05hkkdn48","country_code":"DE","type":"funder","lineage":["https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Ahmed Mustafa","raw_affiliation_strings":["Fraunhofer IIS,Erlangen,Germany","Fraunhofer Society"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS,Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Society","institution_ids":["https://openalex.org/I4923324"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039716073","display_name":"Nicola Pia","orcid":"https://orcid.org/0000-0003-0987-863X"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4923324","display_name":"Fraunhofer Society","ror":"https://ror.org/05hkkdn48","country_code":"DE","type":"funder","lineage":["https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nicola Pia","raw_affiliation_strings":["Fraunhofer IIS,Erlangen,Germany","Fraunhofer Society"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS,Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Society","institution_ids":["https://openalex.org/I4923324"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054985419","display_name":"Guillaume Fuchs","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4923324","display_name":"Fraunhofer Society","ror":"https://ror.org/05hkkdn48","country_code":"DE","type":"funder","lineage":["https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Guillaume Fuchs","raw_affiliation_strings":["Fraunhofer IIS,Erlangen,Germany","Fraunhofer Society"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS,Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Society","institution_ids":["https://openalex.org/I4923324"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5110975574"],"corresponding_institution_ids":["https://openalex.org/I4210124274","https://openalex.org/I4923324"],"apc_list":null,"apc_paid":null,"fwci":0.5599,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71577221,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"6034","last_page":"6038"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7946962118148804},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7719900608062744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.710859477519989},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5408449769020081},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4767873287200928},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4355584979057312},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.4315245449542999},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4269174039363861},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4235476851463318},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3122760057449341}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7946962118148804},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7719900608062744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.710859477519989},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5408449769020081},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4767873287200928},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4355584979057312},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.4315245449542999},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4269174039363861},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4235476851463318},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3122760057449341},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413605","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.01557","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.01557","pdf_url":"https://arxiv.org/pdf/2011.01557","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:fraunhofer.de:N-641427","is_oa":false,"landing_page_url":"http://publica.fraunhofer.de/documents/N-641427.html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400801","display_name":"Publikationsdatenbank der Fraunhofer-Gesellschaft (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fraunhofer IIS","raw_type":"Conference Paper"},{"id":"pmh:oai:publica.fraunhofer.de:publica/413117","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/413117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"},{"id":"doi:10.48550/arxiv.2011.01557","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.01557","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/ysa9-2k19","is_oa":true,"landing_page_url":"https://doi.org/10.17023/ysa9-2k19","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3097566756","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.01557","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.01557","pdf_url":"https://arxiv.org/pdf/2011.01557","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6600000262260437,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3097566756.pdf","grobid_xml":"https://content.openalex.org/works/W3097566756.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2099471712","https://openalex.org/W2172065531","https://openalex.org/W2502312327","https://openalex.org/W2584032004","https://openalex.org/W2769810959","https://openalex.org/W2788851830","https://openalex.org/W2910577860","https://openalex.org/W2949382160","https://openalex.org/W2950299304","https://openalex.org/W2962974533","https://openalex.org/W2963091184","https://openalex.org/W2963300588","https://openalex.org/W2963685250","https://openalex.org/W2963782041","https://openalex.org/W2970006822","https://openalex.org/W2972562500","https://openalex.org/W2972882294","https://openalex.org/W2975414524","https://openalex.org/W3015338123","https://openalex.org/W3016160783","https://openalex.org/W3046970875","https://openalex.org/W3047334337","https://openalex.org/W3092028330","https://openalex.org/W3097828251","https://openalex.org/W3144035034","https://openalex.org/W6631190155","https://openalex.org/W6695676441","https://openalex.org/W6724804524","https://openalex.org/W6732429163","https://openalex.org/W6748409065","https://openalex.org/W6755257315","https://openalex.org/W6758675244","https://openalex.org/W6767111847","https://openalex.org/W6769767169","https://openalex.org/W6776594914","https://openalex.org/W6781251213","https://openalex.org/W6783867762","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W3161236344","https://openalex.org/W2972562500","https://openalex.org/W2928550135","https://openalex.org/W2912613132","https://openalex.org/W2232843134","https://openalex.org/W3031135612","https://openalex.org/W3199713199","https://openalex.org/W3015338123","https://openalex.org/W3109064156","https://openalex.org/W3173828512","https://openalex.org/W2952917250","https://openalex.org/W2945993525","https://openalex.org/W3160973314","https://openalex.org/W3168043446","https://openalex.org/W3135915893","https://openalex.org/W3164736513","https://openalex.org/W2963954250","https://openalex.org/W2797972631","https://openalex.org/W3156248872","https://openalex.org/W2895807593"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"neural":[3,52,132],"vocoders":[4,133],"have":[5],"surpassed":[6],"classical":[7],"speech":[8,58,91,110],"generation":[9,111],"approaches":[10],"in":[11,41,134],"naturalness":[12],"and":[13,25,36,120,123,136],"perceptual":[14,44],"quality":[15],"of":[16,43,56,78],"the":[17,75,79,90],"synthesized":[18],"speech.":[19,81],"Computationally":[20],"heavy":[21],"models":[22],"like":[23],"WaveNet":[24],"WaveGlow":[26],"achieve":[27],"best":[28],"results,":[29],"while":[30],"lightweight":[31,51],"GAN":[32],"models,":[33],"e.g.":[34],"MelGAN":[35],"Parallel":[37],"WaveGAN,":[38],"remain":[39],"inferior":[40],"terms":[42],"quality.":[45],"We":[46],"therefore":[47],"propose":[48],"StyleMelGAN,":[49],"a":[50,70,95,102],"vocoder":[53],"allowing":[54],"synthesis":[55],"high-fidelity":[57],"with":[59,74,98],"low":[60],"computational":[61],"complexity.":[62],"StyleMelGAN":[63,129],"employs":[64],"temporal":[65],"adaptive":[66],"normalization":[67],"to":[68],"style":[69],"low-dimensional":[71],"noise":[72],"vector":[73],"acoustic":[76],"features":[77],"target":[80],"For":[82],"efficient":[83],"training,":[84],"multiple":[85],"random-window":[86],"discriminators":[87],"adversarially":[88],"evaluate":[89],"signal":[92],"analyzed":[93],"by":[94,101],"filter":[96],"bank,":[97],"regularization":[99],"provided":[100],"multi-scale":[103],"spectral":[104],"reconstruction":[105],"loss.":[106],"The":[107],"highly":[108],"parallelizable":[109],"is":[112],"several":[113],"times":[114],"faster":[115],"than":[116],"real-time":[117],"on":[118],"CPUs":[119],"GPUs.":[121],"MUSHRA":[122],"P.800":[124],"listening":[125],"tests":[126],"show":[127],"that":[128],"outperforms":[130],"prior":[131],"copy-synthesis":[135],"Text-to-Speech":[137],"scenarios.":[138]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
