{"id":"https://openalex.org/W4210287297","doi":"https://doi.org/10.1109/asru51503.2021.9688194","title":"Multi-Stream HiFi-GAN with Data-Driven Waveform Decomposition","display_name":"Multi-Stream HiFi-GAN with Data-Driven Waveform Decomposition","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210287297","doi":"https://doi.org/10.1109/asru51503.2021.9688194"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688194","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022943637","display_name":"Takuma Okamoto","orcid":"https://orcid.org/0000-0001-9913-4647"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuma Okamoto","raw_affiliation_strings":["National Institute of Information and Communications Technology,Japan","National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology,Japan","institution_ids":["https://openalex.org/I90023481"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]},{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Information Technology Center, Nagoya University,Japan","Information Technology Center, Nagoya University, Japan","National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University,Japan","institution_ids":["https://openalex.org/I60134161"]},{"raw_affiliation_string":"Information Technology Center, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114514387","display_name":"Hisashi Kawai","orcid":"https://orcid.org/0000-0002-0914-5092"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hisashi Kawai","raw_affiliation_strings":["National Institute of Information and Communications Technology,Japan","National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology,Japan","institution_ids":["https://openalex.org/I90023481"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022943637"],"corresponding_institution_ids":["https://openalex.org/I90023481"],"apc_list":null,"apc_paid":null,"fwci":1.2687,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.83989647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"610","last_page":"617"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.7732397317886353},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7688589692115784},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4568532109260559},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44118037819862366},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.42320239543914795},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.4219942092895508},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08932158350944519},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0855223536491394},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.07822868227958679}],"concepts":[{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.7732397317886353},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7688589692115784},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4568532109260559},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44118037819862366},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.42320239543914795},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.4219942092895508},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08932158350944519},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0855223536491394},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.07822868227958679},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688194","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/5","display_name":"Gender equality"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2063678701","https://openalex.org/W2172065531","https://openalex.org/W2476548250","https://openalex.org/W2519091744","https://openalex.org/W2535388113","https://openalex.org/W2747874407","https://openalex.org/W2749651610","https://openalex.org/W2769810959","https://openalex.org/W2785516183","https://openalex.org/W2801291345","https://openalex.org/W2903739847","https://openalex.org/W2908510526","https://openalex.org/W2911708970","https://openalex.org/W2912237252","https://openalex.org/W2952218014","https://openalex.org/W2952809536","https://openalex.org/W2963090522","https://openalex.org/W2963091184","https://openalex.org/W2963300588","https://openalex.org/W2963405869","https://openalex.org/W2963952344","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W2968917279","https://openalex.org/W2970006822","https://openalex.org/W2972789651","https://openalex.org/W2975414524","https://openalex.org/W2996286887","https://openalex.org/W3007859642","https://openalex.org/W3015338123","https://openalex.org/W3015478688","https://openalex.org/W3016160783","https://openalex.org/W3026874504","https://openalex.org/W3033411150","https://openalex.org/W3036167779","https://openalex.org/W3045000780","https://openalex.org/W3081800019","https://openalex.org/W3092028330","https://openalex.org/W3095419383","https://openalex.org/W3096702751","https://openalex.org/W3097538987","https://openalex.org/W3097828251","https://openalex.org/W3099330747","https://openalex.org/W3117914007","https://openalex.org/W3123097577","https://openalex.org/W3129651364","https://openalex.org/W3144035034","https://openalex.org/W3161296985","https://openalex.org/W3161782335","https://openalex.org/W3162075194","https://openalex.org/W3162366763","https://openalex.org/W3165891254","https://openalex.org/W3169905056","https://openalex.org/W3196468212","https://openalex.org/W3197095449","https://openalex.org/W3197273793","https://openalex.org/W3197294703","https://openalex.org/W3198011653","https://openalex.org/W3198769980","https://openalex.org/W3198843334","https://openalex.org/W3198869563","https://openalex.org/W4287372095","https://openalex.org/W4287694050","https://openalex.org/W4287761884","https://openalex.org/W4294619240","https://openalex.org/W4297817572","https://openalex.org/W4298580827","https://openalex.org/W4320013936","https://openalex.org/W6610566761","https://openalex.org/W6631190155","https://openalex.org/W6729060410","https://openalex.org/W6734312481","https://openalex.org/W6741681139","https://openalex.org/W6748409065","https://openalex.org/W6753855596","https://openalex.org/W6755257315","https://openalex.org/W6757585730","https://openalex.org/W6757817989","https://openalex.org/W6767111847","https://openalex.org/W6767164110","https://openalex.org/W6777694618","https://openalex.org/W6778823374","https://openalex.org/W6779337556","https://openalex.org/W6779823529","https://openalex.org/W6781251213","https://openalex.org/W6782760101","https://openalex.org/W6783182287","https://openalex.org/W6783867762","https://openalex.org/W6789403026","https://openalex.org/W6796464841"],"related_works":["https://openalex.org/W1974895211","https://openalex.org/W2129841057","https://openalex.org/W3040712279","https://openalex.org/W2176409448","https://openalex.org/W2364769705","https://openalex.org/W2056136368","https://openalex.org/W2374664672","https://openalex.org/W4367555392","https://openalex.org/W2538520412","https://openalex.org/W2039489009"],"abstract_inverted_index":{"Although":[0],"a":[1,16,32,78,104,122],"HiFi-GAN":[2,63,112,152],"vocoder":[3],"can":[4,153],"synthesize":[5],"high-fidelity":[6],"speech":[7,101],"waveforms":[8,102],"in":[9,66,72,103,163],"real":[10],"time":[11],"on":[12,120,127],"CPUs,":[13,121],"there":[14],"is":[15,35,75],"tradeoff":[17],"between":[18],"synthesis":[19,30,70,97,155,161,174],"quality":[20,162],"and":[21,62,134,147,165,171],"inference":[22,26],"speed.":[23],"To":[24,107],"increase":[25,154],"speed":[27,156],"while":[28,157],"maintaining":[29,160],"quality,":[31],"multi-band":[33,54],"structure":[34],"introduced":[36],"to":[37,88,99],"HiFi-GAN.":[38,182],"However,":[39],"it":[40],"cannot":[41],"be":[42],"trained":[43],"well":[44],"because":[45],"of":[46,142],"the":[47,52,68,83,91,95,109,180],"strong":[48],"constraint":[49],"imposed":[50],"by":[51,77],"fixed":[53,69],"structure.":[55,85],"As":[56],"an":[57,114],"alternative":[58],"approach,":[59],"Multi-stream":[60,111,151],"MelGAN":[61,74],"are":[64],"proposed,":[65],"which":[67],"filter":[71,98],"Multi-band":[73,89],"replaced":[76],"trainable":[79,96],"convolutional":[80],"layer":[81],"with":[82,131,179],"same":[84],"In":[86],"contrast":[87],"MelGAN,":[90],"proposed":[92,110],"methods":[93],"use":[94],"decompose":[100],"data-driven":[105],"manner.":[106],"evaluate":[108],"as":[113],"entire":[115],"real-time":[116],"neural":[117],"text-to-speech":[118,166],"system":[119],"fast":[123],"acoustic":[124],"model,":[125],"based":[126],"Parallel":[128],"Tacotron":[129],"2":[130],"forced":[132],"alignment":[133],"accentual":[135],"label":[136],"input,":[137],"was":[138],"implemented.":[139],"The":[140],"results":[141],"experiments-using":[143],"Japanese":[144],"male,":[145],"female,":[146],"multi-speaker":[148,176],"corpora-indicate":[149],"that":[150],"improving":[158],"or":[159],"analysis-synthesis":[164],"conditions":[167],"for":[168,175],"single-speaker":[169],"models":[170],"unseen":[172],"speaker":[173],"models,":[177],"compared":[178],"original":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2025-10-10T00:00:00"}
