{"id":"https://openalex.org/W3096080837","doi":"https://doi.org/10.21437/interspeech.2020-1613","title":"Reverberation Modeling for Source-Filter-Based Neural Vocoder","display_name":"Reverberation Modeling for Source-Filter-Based Neural Vocoder","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3096080837","doi":"https://doi.org/10.21437/interspeech.2020-1613","mag":"3096080837"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1613","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang Ai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5045907056"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7577,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.7158657,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3560","last_page":"3564"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11698","display_name":"Underwater Acoustics Research","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.9371042251586914},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.776954174041748},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6291346549987793},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.5391848683357239},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5354637503623962},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4802737236022949},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.4519602060317993},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.4409833550453186},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.3129352331161499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30643951892852783},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14305683970451355},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.08985510468482971},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0781978964805603}],"concepts":[{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.9371042251586914},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.776954174041748},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6291346549987793},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.5391848683357239},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5354637503623962},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4802737236022949},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.4519602060317993},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.4409833550453186},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3129352331161499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30643951892852783},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14305683970451355},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.08985510468482971},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0781978964805603},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1613","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W1656519308","https://openalex.org/W2944572343","https://openalex.org/W2333799855","https://openalex.org/W2351687372","https://openalex.org/W2042717753"],"abstract_inverted_index":{"This":[0,18],"paper":[1],"presents":[2],"a":[3,32,40,59,73,118,166],"reverberation":[4,109,138,162,169],"module":[5,19,110,132],"for":[6,135],"source-filter-based":[7],"neural":[8,25,98],"vocoders":[9,26],"that":[10,129],"improves":[11],"the":[12,21,37,53,67,70,102,107,112,124,130,137,142,158],"performance":[13],"of":[14,24,69,117,145],"reverberant":[15,33,147],"effect":[16,139],"modeling.":[17],"uses":[20,96],"output":[22],"waveform":[23,34],"as":[27],"an":[28,80],"input":[29,38],"and":[30,51,64,95,121,140,164],"produces":[31],"by":[35],"convolving":[36],"with":[39],"room":[41],"impulse":[42],"response":[43],"(RIR).":[44],"We":[45,105],"propose":[46],"two":[47],"approaches":[48],"to":[49,100,111,153,160],"parameterizing":[50],"estimating":[52],"RIR.":[54],"The":[55,76,149],"first":[56],"approach":[57,78],"assumes":[58,79],"global":[60],"time-invariant":[61],"(GTI)":[62],"RIR":[63,71,103],"directly":[65],"learns":[66],"values":[68],"on":[72],"training":[74],"dataset.":[75],"second":[77],"utterance-level":[81],"time-variant":[82],"(UTV)":[83],"RIR,":[84],"which":[85],"is":[86],"invariant":[87],"within":[88],"one":[89],"utterance":[90],"but":[91],"varies":[92],"across":[93],"utterances,":[94],"another":[97],"network":[99],"predict":[101],"values.":[104],"add":[106],"proposed":[108,131],"phase":[113],"spectrum":[114],"predictor":[115],"(PSP)":[116],"HiNet":[119],"vocoder":[120],"jointly":[122],"train":[123],"model.":[125],"Experimental":[126],"results":[127],"demonstrate":[128],"was":[133,151],"helpful":[134],"modeling":[136],"improving":[141],"perceived":[143],"quality":[144],"generated":[146],"speech.":[148],"UTV-RIR":[150],"shown":[152],"be":[154],"more":[155],"robust":[156],"than":[157],"GTI-RIR":[159],"unknown":[161],"conditions":[163],"achieved":[165],"perceptually":[167],"better":[168],"effect.":[170]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
