{"id":"https://openalex.org/W2750248772","doi":"https://doi.org/10.21437/interspeech.2017-1160","title":"Hidden Markov Model Variational Autoencoder for Acoustic Unit Discovery","display_name":"Hidden Markov Model Variational Autoencoder for Acoustic Unit Discovery","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2750248772","doi":"https://doi.org/10.21437/interspeech.2017-1160","mag":"2750248772"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-1160","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055580486","display_name":"Janek Ebbers","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Janek Ebbers","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006131416","display_name":"Jahn Heymann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jahn Heymann","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011191228","display_name":"Lukas Drude","orcid":"https://orcid.org/0000-0003-3683-5432"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lukas Drude","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034662562","display_name":"Thomas Glarner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas Glarner","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082075598","display_name":"Reinhold Haeb\u2010Umbach","orcid":"https://orcid.org/0000-0001-9468-7330"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reinhold Haeb-Umbach","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5113017615","display_name":"Bhiksha Raj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhiksha Raj","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5055580486"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.7417,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.94628752,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"488","last_page":"492"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8727566599845886},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7621031403541565},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.591765820980072},{"id":"https://openalex.org/keywords/hidden-semi-markov-model","display_name":"Hidden semi-Markov model","score":0.583907425403595},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.49593719840049744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4755235016345978},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.43091264367103577},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4157163202762604},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.41258078813552856},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4013136029243469},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37224531173706055},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2557547986507416},{"id":"https://openalex.org/keywords/variable-order-markov-model","display_name":"Variable-order Markov model","score":0.25509676337242126},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22566664218902588},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.20858576893806458},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12506645917892456}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8727566599845886},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7621031403541565},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.591765820980072},{"id":"https://openalex.org/C64939953","wikidata":"https://www.wikidata.org/wiki/Q3859882","display_name":"Hidden semi-Markov model","level":5,"score":0.583907425403595},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.49593719840049744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4755235016345978},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.43091264367103577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4157163202762604},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.41258078813552856},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4013136029243469},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37224531173706055},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2557547986507416},{"id":"https://openalex.org/C54907487","wikidata":"https://www.wikidata.org/wiki/Q7915688","display_name":"Variable-order Markov model","level":4,"score":0.25509676337242126},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22566664218902588},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.20858576893806458},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12506645917892456},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2017-1160","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1796128977","https://openalex.org/W1971081490","https://openalex.org/W2077804127","https://openalex.org/W2078769636","https://openalex.org/W2100768664","https://openalex.org/W2117041980","https://openalex.org/W2142384583","https://openalex.org/W2159283619","https://openalex.org/W2347098582","https://openalex.org/W2468716020","https://openalex.org/W2556467266","https://openalex.org/W2949416428","https://openalex.org/W2962695963"],"related_works":["https://openalex.org/W1510894296","https://openalex.org/W2316449557","https://openalex.org/W2379938888","https://openalex.org/W4246505579","https://openalex.org/W2134386692","https://openalex.org/W2103649304","https://openalex.org/W3044198794","https://openalex.org/W2116722627","https://openalex.org/W2176285001","https://openalex.org/W2537260108"],"abstract_inverted_index":{"Variational":[0],"Autoencoders":[1],"(VAEs)":[2],"have":[3],"been":[4],"shown":[5],"to":[6,62,113,135],"provide":[7,130],"efficient":[8],"neural-network-based":[9],"approximate":[10],"Bayesian":[11],"inference":[12,19,30,82],"for":[13,16,115],"observation":[14],"models":[15],"which":[17],"exact":[18],"is":[20,121],"intractable.":[21],"Its":[22],"extension,":[23,44],"the":[24,32,59,63,93,96,105,124],"so-called":[25],"Structured":[26],"VAE":[27,48],"(SVAE)":[28],"allows":[29],"in":[31,69,83,123],"presence":[33],"of":[34,65,95],"both":[35],"discrete":[36],"and":[37],"continuous":[38],"latent":[39,55],"variables.":[40],"Inspired":[41],"by":[42,104],"this":[43],"we":[45,110],"developed":[46],"a":[47,70],"with":[49,86],"Hidden":[50],"Markov":[51],"Models":[52],"(HMMs)":[53],"as":[54],"models.":[56],"We":[57],"applied":[58],"resulting":[60],"HMM-VAE":[61],"task":[64,119],"acoustic":[66,97],"unit":[67,98],"discovery":[68,99],"zero":[71],"resource":[72],"scenario.":[73],"Starting":[74],"from":[75],"an":[76,84,116],"initial":[77],"model":[78],"based":[79],"on":[80],"variational":[81],"HMM":[85],"Gaussian":[87],"Mixture":[88],"Model":[89],"(GMM)":[90],"emission":[91],"probabilities,":[92],"accuracy":[94],"could":[100],"be":[101],"significantly":[102],"improved":[103],"HMM-VAE.":[106],"In":[107],"doing":[108],"so":[109],"were":[111],"able":[112],"demonstrate":[114],"unsupervised":[117],"learning":[118,126],"what":[120],"well-known":[122],"supervised":[125],"case:":[127],"Neural":[128],"networks":[129],"superior":[131],"modeling":[132],"power":[133],"compared":[134],"GMMs.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
