{"id":"https://openalex.org/W3188073270","doi":"https://doi.org/10.1109/waspaa52581.2021.9632750","title":"A Streamwise Gan Vocoder for Wideband Speech Coding at Very Low Bit Rate","display_name":"A Streamwise Gan Vocoder for Wideband Speech Coding at Very Low Bit Rate","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3188073270","doi":"https://doi.org/10.1109/waspaa52581.2021.9632750","mag":"3188073270"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa52581.2021.9632750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632750","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110975574","display_name":"Ahmed Mustafa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Ahmed Mustafa","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042244085","display_name":"Jan B\u00fcthe","orcid":"https://orcid.org/0000-0001-5872-4325"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Buthe","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077147264","display_name":"Srikanth Korse","orcid":"https://orcid.org/0009-0008-7564-9628"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Srikanth Korse","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048846474","display_name":"Kishan Gupta","orcid":"https://orcid.org/0000-0002-4354-0758"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Kishan Gupta","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054985419","display_name":"Guillaume Fuchs","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Guillaume Fuchs","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039716073","display_name":"Nicola Pia","orcid":"https://orcid.org/0000-0003-0987-863X"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nicola Pia","raw_affiliation_strings":["Fraunhofer IIS, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer IIS, Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5110975574"],"corresponding_institution_ids":["https://openalex.org/I4210124274"],"apc_list":null,"apc_paid":null,"fwci":2.1603,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.8823962,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"66","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8348376154899597},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.752787172794342},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.6773605942726135},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.5727885961532593},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5461500883102417},{"id":"https://openalex.org/keywords/codec2","display_name":"Codec2","score":0.44610682129859924},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0713123083114624}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8348376154899597},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.752787172794342},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.6773605942726135},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.5727885961532593},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5461500883102417},{"id":"https://openalex.org/C75217168","wikidata":"https://www.wikidata.org/wiki/Q1105653","display_name":"Codec2","level":4,"score":0.44610682129859924},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0713123083114624},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/waspaa52581.2021.9632750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632750","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/504754","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/504754","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.8199999928474426,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2099471712","https://openalex.org/W2103934944","https://openalex.org/W2141998673","https://openalex.org/W2172065531","https://openalex.org/W2502312327","https://openalex.org/W2519091744","https://openalex.org/W2527729766","https://openalex.org/W2775336875","https://openalex.org/W2949382160","https://openalex.org/W2963091184","https://openalex.org/W2963208781","https://openalex.org/W2963300588","https://openalex.org/W2964121744","https://openalex.org/W2964307104","https://openalex.org/W2970006822","https://openalex.org/W2972519044","https://openalex.org/W2996286887","https://openalex.org/W3015338123","https://openalex.org/W3025581723","https://openalex.org/W3036682213","https://openalex.org/W3092028330","https://openalex.org/W3097538987","https://openalex.org/W3098403858","https://openalex.org/W3103781353","https://openalex.org/W3144035034","https://openalex.org/W3160077247","https://openalex.org/W3161236344","https://openalex.org/W3163662330","https://openalex.org/W4298580827","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6675521023","https://openalex.org/W6748409065","https://openalex.org/W6755853340","https://openalex.org/W6760712927","https://openalex.org/W6767111847","https://openalex.org/W6768435317","https://openalex.org/W6780365925","https://openalex.org/W6783867762"],"related_works":["https://openalex.org/W2509918616","https://openalex.org/W2060409740","https://openalex.org/W2010376816","https://openalex.org/W1911859126","https://openalex.org/W1542588102","https://openalex.org/W1000239413","https://openalex.org/W1570840316","https://openalex.org/W2139283974","https://openalex.org/W2156505556","https://openalex.org/W2120730869"],"abstract_inverted_index":{"Recently,":[0],"GAN":[1,49],"vocoders":[2,25,100],"have":[3],"seen":[4],"rapid":[5],"progress":[6],"in":[7,15,79,124],"speech":[8,35,57,108,137,160],"synthesis,":[9],"starting":[10],"to":[11,54,135],"outperform":[12],"autoregressive":[13,24,99],"models":[14,155],"perceptual":[16],"quality":[17,133],"with":[18,110],"much":[19],"higher":[20],"generation":[21,33],"speed.":[22],"However,":[23],"are":[26],"still":[27],"the":[28,73,93,122],"common":[29],"choice":[30],"for":[31,85,103,156],"neural":[32],"of":[34,72,113,121,152],"signals":[36],"coded":[37,61],"at":[38,62,142],"very":[39,104],"low":[40,105,157],"bit":[41,106,158],"rates.":[42],"In":[43],"this":[44,125,128],"paper,":[45],"we":[46],"present":[47],"a":[48,69,118],"vocoder":[50,75,131],"which":[51,148],"is":[52,68],"able":[53],"generate":[55],"wideband":[56],"waveforms":[58],"from":[59],"parameters":[60],"1.6":[63],"kbit/s.":[64],"The":[65,88],"proposed":[66,94],"model":[67,95],"modified":[70],"version":[71],"StyleMelGAN":[74],"that":[76,92],"can":[77],"run":[78],"frame-by-frame":[80],"manner,":[81],"making":[82],"it":[83],"suitable":[84],"streaming":[86],"applications.":[87],"experimental":[89],"results":[90],"show":[91],"significantly":[96],"outperforms":[97],"prior":[98],"like":[101],"LPC-Net":[102],"rate":[107,159],"coding,":[109],"computational":[111],"complexity":[112],"about":[114],"5":[115],"GMACs,":[116],"providing":[117],"new":[119],"state":[120],"art":[123],"domain.":[126],"Moreover,":[127],"streamwise":[129],"adversarial":[130],"delivers":[132],"competitive":[134],"advanced":[136],"codecs":[138],"such":[139],"as":[140],"EVS":[141],"5.9":[143],"kbit/s":[144],"on":[145],"clean":[146],"speech,":[147],"motivates":[149],"further":[150],"usage":[151],"feedforward":[153],"fully-convolutional":[154],"coding.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
