{"id":"https://openalex.org/W2939776061","doi":"https://doi.org/10.1109/icassp.2019.8682520","title":"Deep Variational Filter Learning Models for Speech Recognition","display_name":"Deep Variational Filter Learning Models for Speech Recognition","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2939776061","doi":"https://doi.org/10.1109/icassp.2019.8682520","mag":"2939776061"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050546237","display_name":"Purvi Agrawal","orcid":"https://orcid.org/0000-0002-1165-8348"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Purvi Agrawal","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns (LEAP) lab, Electrical Engineering, Indian Institute of Science, Bangalore, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns (LEAP) lab, Electrical Engineering, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002536077","display_name":"Sriram Ganapathy","orcid":"https://orcid.org/0000-0002-5779-9066"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sriram Ganapathy","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns (LEAP) lab, Electrical Engineering, Indian Institute of Science, Bangalore, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns (LEAP) lab, Electrical Engineering, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.8349,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7211638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"1","issue":null,"first_page":"5731","last_page":"5735"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7814990282058716},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6924600601196289},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6468480825424194},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6211509108543396},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.5784502625465393},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5059327483177185},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.48802462220191956},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48070985078811646},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.45086580514907837},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.4343544542789459},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.39181268215179443},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10926923155784607}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7814990282058716},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6924600601196289},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6468480825424194},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6211509108543396},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.5784502625465393},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5059327483177185},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.48802462220191956},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48070985078811646},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.45086580514907837},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.4343544542789459},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.39181268215179443},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10926923155784607},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C107038049","wikidata":"https://www.wikidata.org/wiki/Q35986","display_name":"Aesthetics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp.2019.8682520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai::78377","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401702","display_name":"Universitas Pasundan institutional repositories & scientific journals (Universitas Pasundan)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210159629","host_organization_name":"Universitas Pasundan","host_organization_lineage":["https://openalex.org/I4210159629"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W154677192","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1541452272","https://openalex.org/W1627087495","https://openalex.org/W1903029394","https://openalex.org/W1959608418","https://openalex.org/W1965248225","https://openalex.org/W1992475611","https://openalex.org/W2054139811","https://openalex.org/W2062164080","https://openalex.org/W2068359377","https://openalex.org/W2096051479","https://openalex.org/W2114719288","https://openalex.org/W2130426352","https://openalex.org/W2136655611","https://openalex.org/W2137075158","https://openalex.org/W2159373586","https://openalex.org/W2289394825","https://openalex.org/W2400622930","https://openalex.org/W2745441477","https://openalex.org/W2756577849","https://openalex.org/W2889087444","https://openalex.org/W2964121744","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6632326908","https://openalex.org/W6636776493","https://openalex.org/W6640963894","https://openalex.org/W6679997575","https://openalex.org/W6713160670"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W3013693939","https://openalex.org/W2088854863","https://openalex.org/W2159052453","https://openalex.org/W2669956259","https://openalex.org/W4249005693","https://openalex.org/W4392946183","https://openalex.org/W3088732000"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,42],"novel":[3],"approach":[4,26],"to":[5,47,89],"derive":[6],"robust":[7,134],"speech":[8,12,33],"representations":[9],"for":[10,94,121,167],"automatic":[11],"recognition":[13],"(ASR)":[14],"systems.":[15],"The":[16,84,97,116],"proposed":[17,123,162],"method":[18],"uses":[19],"an":[20],"unsupervised":[21],"data-driven":[22],"modulation":[23,49,67,74],"filter":[24],"learning":[25,63],"that":[27],"preserves":[28],"the":[29,62,70,91,122,127,158,161,195],"key":[30],"modulations":[31],"of":[32,64,81,139,160,170,175,184,194],"signal":[34],"in":[35,69,141],"spectro-temporal":[36],"domain.":[37],"This":[38],"is":[39,164],"achieved":[40],"by":[41],"deep":[43],"generative":[44],"modeling":[45],"framework":[46],"learn":[48],"filters":[50,68,86],"using":[51,76],"convolutional":[52],"variational":[53],"autoencoder":[54],"(CVAE).":[55],"A":[56],"skip":[57],"connection":[58],"based":[59],"CVAE":[60,124],"enables":[61],"multiple":[65],"irredundant":[66],"time":[71],"and":[72,78,109,151],"frequency":[73],"domain":[75],"temporal":[77],"spectral":[79],"trajectories":[80],"input":[82],"spectrograms.":[83],"learnt":[85],"are":[87,100,179],"used":[88],"process":[90],"spectrogram":[92],"features":[93,129,147,163,188],"ASR":[95,98,171],"training.":[96],"experiments":[99],"performed":[101],"on":[102,148,153,189],"Aurora-4":[103,149,190],"(additive":[104,111],"noise":[105,112],"with":[106,113,192],"channel":[107],"artifact)":[108],"CHiME-3":[110,154],"reverberation)":[114],"databases.":[115],"results":[117],"show":[118],"significant":[119],"improvements":[120,138,183],"model":[125],"over":[126,145,186],"baseline":[128,146,187],"as":[130,132],"well":[131],"other":[133],"front-ends":[135],"(average":[136,181],"relative":[137,182],"9%":[140],"word":[142],"error":[143],"rate":[144],"database":[150,191],"23%":[152],"database).":[155],"In":[156],"addition,":[157],"performance":[159],"highly":[165],"beneficial":[166],"semi-supervised":[168],"training":[169,177,197],"when":[172],"reduced":[173],"amounts":[174],"labeled":[176,196],"data":[178],"available":[180],"29%":[185],"30%":[193],"data).":[198]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
