{"id":"https://openalex.org/W2942544643","doi":"https://doi.org/10.1109/jstsp.2019.2913965","title":"Modulation Filter Learning Using Deep Variational Networks for Robust Speech Recognition","display_name":"Modulation Filter Learning Using Deep Variational Networks for Robust Speech Recognition","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2942544643","doi":"https://doi.org/10.1109/jstsp.2019.2913965","mag":"2942544643"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2019.2913965","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2019.2913965","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050546237","display_name":"Purvi Agrawal","orcid":"https://orcid.org/0000-0002-1165-8348"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Purvi Agrawal","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns Lab, Department of Electrical Engineering, Indian Institute of Science, Bangalore, India"],"raw_orcid":"https://orcid.org/0000-0002-1165-8348","affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns Lab, Department of Electrical Engineering, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002536077","display_name":"Sriram Ganapathy","orcid":"https://orcid.org/0000-0002-5779-9066"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sriram Ganapathy","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns Lab, Department of Electrical Engineering, Indian Institute of Science, Bangalore, India"],"raw_orcid":"https://orcid.org/0000-0002-5779-9066","affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns Lab, Department of Electrical Engineering, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":4.3414,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.95234037,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"13","issue":"2","first_page":"244","last_page":"253"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7426109910011292},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7185380458831787},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7071341872215271},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.6760875582695007},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6288900375366211},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5617222189903259},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5411306023597717},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5362517237663269},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5266119241714478},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.476335346698761},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4747738540172577},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.4660906195640564},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4232734143733978},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.21046346426010132},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.13853734731674194},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.12247684597969055},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07696607708930969}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7426109910011292},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7185380458831787},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7071341872215271},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.6760875582695007},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6288900375366211},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5617222189903259},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5411306023597717},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5362517237663269},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5266119241714478},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.476335346698761},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4747738540172577},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.4660906195640564},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4232734143733978},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.21046346426010132},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.13853734731674194},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.12247684597969055},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07696607708930969},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/jstsp.2019.2913965","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2019.2913965","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:eprints.iisc.ac.in:62880","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196309","display_name":"NOT FOUND REPOSITORY (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5099999904632568,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320768","display_name":"Department of Atomic Energy, Government of India","ror":"https://ror.org/02m388s04"},{"id":"https://openalex.org/F4320326272","display_name":"Department of Science and Technology, Philippines","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W130921173","https://openalex.org/W154677192","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1965248225","https://openalex.org/W1969851134","https://openalex.org/W1974932989","https://openalex.org/W1989314204","https://openalex.org/W1992475611","https://openalex.org/W1999686891","https://openalex.org/W2023262923","https://openalex.org/W2037783071","https://openalex.org/W2050752817","https://openalex.org/W2054139811","https://openalex.org/W2062164080","https://openalex.org/W2064675550","https://openalex.org/W2088137865","https://openalex.org/W2096051479","https://openalex.org/W2100495367","https://openalex.org/W2107223151","https://openalex.org/W2108548422","https://openalex.org/W2114719288","https://openalex.org/W2121903223","https://openalex.org/W2128653836","https://openalex.org/W2130426352","https://openalex.org/W2136655611","https://openalex.org/W2137075158","https://openalex.org/W2141411743","https://openalex.org/W2148613904","https://openalex.org/W2159373586","https://openalex.org/W2160815625","https://openalex.org/W2164502538","https://openalex.org/W2289394825","https://openalex.org/W2394873997","https://openalex.org/W2400622930","https://openalex.org/W2514656709","https://openalex.org/W2582082042","https://openalex.org/W2586756136","https://openalex.org/W2729190387","https://openalex.org/W2745441477","https://openalex.org/W2756577849","https://openalex.org/W2889087444","https://openalex.org/W2951326654","https://openalex.org/W2962896155","https://openalex.org/W2963175699","https://openalex.org/W4320013936","https://openalex.org/W4391602018","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6640963894","https://openalex.org/W6678353412","https://openalex.org/W6678409544","https://openalex.org/W6679997575","https://openalex.org/W6713160670","https://openalex.org/W6727654133"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W3013693939","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2566616303","https://openalex.org/W2088854863","https://openalex.org/W2159052453","https://openalex.org/W3131327266","https://openalex.org/W4402568167","https://openalex.org/W2734887215"],"abstract_inverted_index":{"The":[0,26,107],"performance":[1],"of":[2,13,28,37,142,145,188,212,232],"a":[3,66,88,140],"typical":[4],"speech":[5,39,53,105,130,134,218],"recognition":[6,131,135,219],"system":[7,237],"is":[8,207],"degraded":[9],"in":[10,77,87,98,117],"the":[11,21,34,38,49,83,95,99,103,113,118,126,164,174,193,197,204,235],"presence":[12],"extrinsic":[14],"sources":[15],"like":[16,24],"noise":[17,46,147,157,182],"and":[18,155,190,199],"due":[19],"to":[20,32,45,124,210],"recording":[22],"artifacts":[23,150],"reverberation.":[25],"principle":[27],"modulation":[29,59,70,109,166],"filtering":[30],"attempts":[31],"remove":[33],"spectro-temporal":[35,120],"modulations":[36,51],"signal":[40],"that":[41,61],"are":[42,62,122,137],"more":[43],"susceptible":[44],"while":[47],"preserving":[48],"key":[50],"for":[52,69,129,215],"recognition.":[54],"While":[55],"traditional":[56],"approaches":[57],"use":[58],"filters":[60,97],"hand-crafted,":[63],"we":[64,81,226],"propose":[65],"novel":[67],"method":[68,206],"filter":[71,84,167],"learning":[72,85,168],"using":[73,112,238],"deep":[74,89,114],"variational":[75,100,115],"models":[76],"this":[78],"paper.":[79],"Specifically,":[80],"pose":[82],"problem":[86],"unsupervised":[90],"generative":[91],"modeling":[92],"framework":[93,169],"where":[94],"convolutional":[96],"autoencoder":[101],"capture":[102],"important":[104],"modulations.":[106],"two-dimensional":[108],"filters,":[110],"learned":[111],"networks":[116],"joint":[119],"domain,":[121],"used":[123],"process":[125],"spectrogram":[127],"features":[128,176,195],"task.":[132],"Several":[133],"experiments":[136],"performed":[138],"on":[139,196,223],"set":[141],"tasks":[143],"consisting":[144],"additive":[146,156],"with":[148,158],"channel":[149],"(Aurora-4),":[151],"reverberation":[152,159],"(REVERB":[153],"Challenge),":[154],"(CHiME-3).":[160],"In":[161],"these":[162],"experiments,":[163],"proposed":[165,205],"shows":[170],"significant":[171],"improvements":[172,187],"over":[173,192,234],"baseline":[175,194,236],"as":[177,179],"well":[178],"various":[180],"other":[181],"robust":[183],"front-ends":[184],"(average":[185],"relative":[186,230],"7.5%":[189],"20%":[191],"Aurora-4":[198,224],"CHiME-3":[200],"databases":[201],"respectively).":[202],"Furthermore,":[203],"also":[208],"shown":[209],"be":[211],"considerable":[213],"benefit":[214],"semi-supervised":[216],"automatic":[217],"applications.":[220],"For":[221],"example,":[222],"database":[225],"observe":[227],"an":[228],"average":[229],"improvement":[231],"25%":[233],"30%":[239],"labeled":[240],"training":[241],"data.":[242]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
