{"id":"https://openalex.org/W2787426069","doi":"https://doi.org/10.1109/asru.2017.8269012","title":"Composite embedding systems for ZeroSpeech2017 Track1","display_name":"Composite embedding systems for ZeroSpeech2017 Track1","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2787426069","doi":"https://doi.org/10.1109/asru.2017.8269012","mag":"2787426069"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2017.8269012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8269012","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003049061","display_name":"Hayato Shibata","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hayato Shibata","raw_affiliation_strings":["Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103543327","display_name":"Taku Kato","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Taku Kato","raw_affiliation_strings":["Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103015161","display_name":"Takahiro Shinozaki","orcid":"https://orcid.org/0000-0001-8114-8450"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takahiro Shinozaki","raw_affiliation_strings":["Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, School of Engineering, Kanagawa, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085092659","display_name":"Shinji Watanabet","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabet","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories, MA, USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5003049061"],"corresponding_institution_ids":["https://openalex.org/I114531698"],"apc_list":null,"apc_paid":null,"fwci":1.3651,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.86521316,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"747","last_page":"753"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7441662549972534},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6877422332763672},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6691429615020752},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5810277462005615},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5297877192497253},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.49957704544067383},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4895997643470764},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.479909747838974},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.45374566316604614},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4373423457145691},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.41291895508766174},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3765029013156891},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3316074013710022}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7441662549972534},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6877422332763672},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6691429615020752},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5810277462005615},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5297877192497253},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.49957704544067383},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4895997643470764},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.479909747838974},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.45374566316604614},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4373423457145691},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.41291895508766174},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3765029013156891},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3316074013710022},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2017.8269012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8269012","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1545920196","https://openalex.org/W1686810756","https://openalex.org/W1801780804","https://openalex.org/W1942713348","https://openalex.org/W2033436836","https://openalex.org/W2079460648","https://openalex.org/W2085628288","https://openalex.org/W2114347655","https://openalex.org/W2142390309","https://openalex.org/W2148154194","https://openalex.org/W2286443923","https://openalex.org/W2288471100","https://openalex.org/W2396043527","https://openalex.org/W2399576818","https://openalex.org/W2402741009","https://openalex.org/W2515119768","https://openalex.org/W2586327937","https://openalex.org/W2627092829","https://openalex.org/W2747414243","https://openalex.org/W2786608204","https://openalex.org/W2963620343","https://openalex.org/W6640777149","https://openalex.org/W6695606915","https://openalex.org/W6712202099","https://openalex.org/W6712553779","https://openalex.org/W6712807034","https://openalex.org/W6973666849"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2145836866","https://openalex.org/W2916997151","https://openalex.org/W2949174760"],"abstract_inverted_index":{"This":[0,69],"paper":[1],"investigates":[2],"novel":[3],"composite":[4,188],"embedding":[5],"systems":[6],"for":[7],"language-independent":[8],"high-performance":[9],"feature":[10],"extraction":[11],"using":[12,77,127],"triphone-based":[13],"DNN-HMM":[14,22],"and":[15,65,93],"character-based":[16],"end-to-end":[17,51,83,155,193],"speech":[18,75],"recognition":[19],"systems.":[20],"The":[21,50,148,170,181],"is":[23,54,71,115],"trained":[24,72],"with":[25,47,73,157,195],"phoneme":[26],"transcripts":[27,79],"based":[28,55],"on":[29,56],"a":[30,57,81,110,140,160,178],"large-scale":[31],"Japanese":[32,45],"ASR":[33,52],"recipe":[34],"included":[35],"in":[36,80,121,139],"the":[37,41,99,128,133,145,153,166,192,196],"Kaldi":[38],"toolkit":[39],"from":[40,98,152],"Corpus":[42],"of":[43,61,105,144],"Spontaneous":[44],"(CSJ)":[46],"some":[48],"modifications.":[49],"system":[53,156],"hybrid":[58],"architecture":[59],"consisting":[60],"an":[62,122,174],"attention-based":[63],"encoder-decoder":[64],"connectionist":[66],"temporal":[67],"classification.":[68],"model":[70],"multi-language":[74],"data":[76],"character":[78],"pure":[82],"fashion":[84],"without":[85],"requiring":[86],"phonemic":[87],"representation.":[88],"Posterior":[89],"features,":[90,92],"PCA-transformed":[91],"bottleneck":[94,136,168,198],"features":[95,106,137,189,194],"are":[96,107],"extracted":[97,151],"two":[100],"systems;":[101],"then,":[102],"various":[103],"combinations":[104],"explored.":[108],"Additionally,":[109],"bypassed":[111,171],"autoencoder":[112],"(bypassed":[113],"AE)":[114],"proposed":[116],"to":[117],"normalize":[118],"speaker":[119],"characteristics":[120],"unsupervised":[123],"manner.":[124],"An":[125],"evaluation":[126],"ABX":[129],"test":[130],"showed":[131],"that":[132,190],"DNN-HMM-based":[134],"CSJ":[135,167,197],"resulted":[138],"good":[141],"performance":[142,163,176],"regardless":[143],"input":[146],"language.":[147],"pre-activation":[149],"vectors":[150],"multilingual":[154],"PCA":[158],"provided":[159],"somewhat":[161],"better":[162],"than":[164],"did":[165],"features.":[169,199],"AE":[172],"yielded":[173],"improved":[175],"over":[177],"baseline":[179],"AE.":[180],"lowest":[182],"error":[183],"rates":[184],"were":[185],"obtained":[186],"by":[187],"concatenated":[191]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
