{"id":"https://openalex.org/W2291027405","doi":"https://doi.org/10.1109/asru.2015.7404791","title":"Stochastic Gradient Variational Bayes for deep learning-based ASR","display_name":"Stochastic Gradient Variational Bayes for deep learning-based ASR","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2291027405","doi":"https://doi.org/10.1109/asru.2015.7404791","mag":"2291027405"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2015.7404791","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2015.7404791","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":true,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":["Faculty of Computer Science, Universitas Indonesia, Indonesia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Universitas Indonesia, Indonesia","institution_ids":["https://openalex.org/I29617571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052754690","display_name":"Mirna Adriani","orcid":null},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Mirna Adriani","raw_affiliation_strings":["Faculty of Computer Science, Universitas Indonesia, Indonesia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Universitas Indonesia, Indonesia","institution_ids":["https://openalex.org/I29617571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038296765"],"corresponding_institution_ids":["https://openalex.org/I29617571"],"apc_list":null,"apc_paid":null,"fwci":1.7258,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.88957403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"12","issue":null,"first_page":"175","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7617123126983643},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7126123905181885},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.6476924419403076},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6427251100540161},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6417771577835083},{"id":"https://openalex.org/keywords/restricted-boltzmann-machine","display_name":"Restricted Boltzmann machine","score":0.5052178502082825},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.5045706033706665},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4877834916114807},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48190954327583313},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4480929970741272},{"id":"https://openalex.org/keywords/boltzmann-machine","display_name":"Boltzmann machine","score":0.44253793358802795},{"id":"https://openalex.org/keywords/deep-belief-network","display_name":"Deep belief network","score":0.4387586712837219},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4380735158920288},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4348365068435669},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4318249523639679},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.3398802876472473},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.24022537469863892}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7617123126983643},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7126123905181885},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.6476924419403076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6427251100540161},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6417771577835083},{"id":"https://openalex.org/C199354608","wikidata":"https://www.wikidata.org/wiki/Q7316287","display_name":"Restricted Boltzmann machine","level":3,"score":0.5052178502082825},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.5045706033706665},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4877834916114807},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48190954327583313},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4480929970741272},{"id":"https://openalex.org/C192576344","wikidata":"https://www.wikidata.org/wiki/Q194706","display_name":"Boltzmann machine","level":3,"score":0.44253793358802795},{"id":"https://openalex.org/C97385483","wikidata":"https://www.wikidata.org/wiki/Q16954980","display_name":"Deep belief network","level":3,"score":0.4387586712837219},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4380735158920288},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4348365068435669},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4318249523639679},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3398802876472473},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.24022537469863892}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2015.7404791","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2015.7404791","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320333293","display_name":"South Dakota Agricultural Experiment Station","ror":null},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320335839","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W189596042","https://openalex.org/W1524333225","https://openalex.org/W1661756259","https://openalex.org/W1691728462","https://openalex.org/W1903355235","https://openalex.org/W1959608418","https://openalex.org/W1984541135","https://openalex.org/W2054658115","https://openalex.org/W2095705004","https://openalex.org/W2108501770","https://openalex.org/W2108677974","https://openalex.org/W2125838338","https://openalex.org/W2136922672","https://openalex.org/W2145094598","https://openalex.org/W2146502635","https://openalex.org/W2152175008","https://openalex.org/W2160815625","https://openalex.org/W2163922914","https://openalex.org/W2165880886","https://openalex.org/W2167270514","https://openalex.org/W2172097686","https://openalex.org/W2405331948","https://openalex.org/W2997574889","https://openalex.org/W6600284362","https://openalex.org/W6607775107","https://openalex.org/W6631362777","https://openalex.org/W6636969168","https://openalex.org/W6637412569","https://openalex.org/W6640963894","https://openalex.org/W6674330103","https://openalex.org/W6675944832","https://openalex.org/W6676315081","https://openalex.org/W6681096077","https://openalex.org/W6681435938","https://openalex.org/W6682825348"],"related_works":["https://openalex.org/W2064630666","https://openalex.org/W3121598771","https://openalex.org/W2287713958","https://openalex.org/W2513801676","https://openalex.org/W2916681395","https://openalex.org/W3010338767","https://openalex.org/W1257380361","https://openalex.org/W3005559199","https://openalex.org/W2133034788","https://openalex.org/W2892911634"],"abstract_inverted_index":{"Many":[0],"successful":[1],"methods":[2],"for":[3,48,97,144],"training":[4,24,58,143],"deep":[5],"neural":[6],"networks":[7],"(DNN)":[8],"rely":[9],"on":[10],"an":[11,92,118],"unsupervised":[12],"pretraining":[13,31,98,166,183],"algorithm.":[14],"It":[15,116],"is":[16,26,56,60,68,87],"particularly":[17],"effective":[18],"when":[19],"the":[20,36,40,54,63,73,78,150,157,189],"number":[21],"of":[22,66,72,94,152],"labeled":[23],"samples":[25],"not":[27],"large":[28],"enough,":[29],"because":[30,62],"method":[32],"helps":[33],"to":[34],"initialize":[35],"parameter":[37,79],"values":[38],"in":[39,141,161],"appropriate":[41],"range":[42],"near":[43],"a":[44,83],"local":[45],"good":[46],"minimum,":[47],"further":[49],"discriminative":[50,142],"finetuning.":[51],"However,":[52],"while":[53],"improvement":[55],"impressive,":[57],"DNN":[59,67],"difficult":[61],"objective":[64],"function":[65,71],"highly":[69],"non-convex":[70],"parameters.":[74],"To":[75],"avoid":[76],"placing":[77],"that":[80,181],"generalizes":[81],"poorly,":[82],"robust":[84],"generative":[85,95],"modelling":[86,96,101],"necessary.":[88],"This":[89],"paper":[90],"explore":[91],"alternative":[93],"DNN-based":[99,153],"acoustic":[100,145,154],"using":[102,156],"Stochastic":[103],"Gradient":[104],"Variational":[105,112],"Bayes":[106,113],"(SGVB)":[107],"within":[108],"autoencoder":[109],"framework":[110],"called":[111],"Autoencoder":[114,176],"(VBAE).":[115],"performs":[117],"efficient":[119],"approximate":[120],"inference":[121],"and":[122,173],"learning":[123],"with":[124,134,163,184],"directed":[125],"probabilistic":[126,131],"graphical":[127],"models.":[128],"During":[129],"fine-tuning,":[130],"encoder":[132],"parameters":[133],"latent":[135,186],"variable":[136],"components":[137],"are":[138],"then":[139],"used":[140,165],"model.":[146],"Here,":[147],"we":[148],"investigate":[149],"performances":[151],"model":[155],"proposed":[158],"pretrained":[159],"VBAE":[160,182],"comparison":[162],"widely":[164],"algorithms":[167],"like":[168],"Restricted":[169],"Boltzmann":[170],"Machine":[171],"(RBM)":[172],"Stacked":[174],"Denoising":[175],"(SDAE).":[177],"The":[178],"results":[179],"reveal":[180],"Gaussian":[185],"variables":[187],"gave":[188],"best":[190],"performance.":[191]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
