{"id":"https://openalex.org/W2811189061","doi":"https://doi.org/10.1109/fskd.2017.8393381","title":"Fast variable-frame-rate decoding of speech recognition based on deep neural networks","display_name":"Fast variable-frame-rate decoding of speech recognition based on deep neural networks","publication_year":2017,"publication_date":"2017-07-01","ids":{"openalex":"https://openalex.org/W2811189061","doi":"https://doi.org/10.1109/fskd.2017.8393381","mag":"2811189061"},"language":"en","primary_location":{"id":"doi:10.1109/fskd.2017.8393381","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fskd.2017.8393381","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044458831","display_name":"Ge Zhang","orcid":"https://orcid.org/0000-0003-4008-9962"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ge Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036446253","display_name":"Pengyuan Zhang","orcid":"https://orcid.org/0000-0001-6838-5160"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyuan Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111847664","display_name":"Jielin Pan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jielin Pan","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100425112","display_name":"Yonghong Yan","orcid":"https://orcid.org/0000-0001-6907-5770"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yonghong Yan","raw_affiliation_strings":["Xinjiang Laboratory of Minority Speech and Language Information Processing, Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Xinjiang Laboratory of Minority Speech and Language Information Processing, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044458831"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23662644,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"821","last_page":"825"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8230988383293152},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7962855100631714},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.7089925408363342},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6701208353042603},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5949305295944214},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5581324696540833},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5348407626152039},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5068641304969788},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4584670960903168},{"id":"https://openalex.org/keywords/time-delay-neural-network","display_name":"Time delay neural network","score":0.4485055208206177},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.4258320927619934},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.4200108051300049},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38062113523483276},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19631987810134888},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09874671697616577},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08168083429336548}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8230988383293152},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7962855100631714},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.7089925408363342},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6701208353042603},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5949305295944214},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5581324696540833},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5348407626152039},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5068641304969788},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4584670960903168},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.4485055208206177},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.4258320927619934},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.4200108051300049},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38062113523483276},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19631987810134888},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09874671697616577},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08168083429336548},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fskd.2017.8393381","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fskd.2017.8393381","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1526236009","https://openalex.org/W1912497050","https://openalex.org/W2046932483","https://openalex.org/W2105594594","https://openalex.org/W2110707300","https://openalex.org/W2112175245","https://openalex.org/W2132991150","https://openalex.org/W2293634267","https://openalex.org/W2295757520","https://openalex.org/W2394932179","https://openalex.org/W2402146185","https://openalex.org/W2507132449","https://openalex.org/W3217546843","https://openalex.org/W4297662257","https://openalex.org/W6712930963"],"related_works":["https://openalex.org/W3203142394","https://openalex.org/W4302615923","https://openalex.org/W1974101135","https://openalex.org/W2351061015","https://openalex.org/W2349784553","https://openalex.org/W2017509870","https://openalex.org/W4220731478","https://openalex.org/W3022596247","https://openalex.org/W2542937328","https://openalex.org/W4251141768"],"abstract_inverted_index":{"Deep":[0],"neural":[1,28,61,74,108],"networks":[2,29,62],"(DNN)":[3],"have":[4],"recently":[5],"shown":[6],"impressive":[7],"performance":[8],"as":[9],"acoustic":[10,65],"models":[11],"for":[12,116],"large":[13],"vocabulary":[14],"continuous":[15],"speech":[16,117],"recognition":[17,118],"(LVCSR)":[18],"tasks.":[19],"Typically,":[20],"the":[21,25,34,38,42,57,79,85,98,113,126,140,143,173],"frame":[22],"shift":[23],"of":[24,27,37,45,60,70,73,81,88,122,128,148,169],"output":[26],"is":[30],"much":[31,156],"shorter":[32],"than":[33,63],"average":[35],"length":[36],"modeling":[39],"units,":[40],"so":[41],"posterior":[43,82,129,136],"vectors":[44,130],"neighbouring":[46],"frames":[47,72,133,152],"are":[48],"likely":[49],"to":[50,78,139,172],"be":[51,92],"similar.":[52],"The":[53],"similarity,":[54],"together":[55],"with":[56,119,134],"better":[58],"discrimination":[59],"typical":[64,174],"models,":[66],"shows":[67],"a":[68,101,135,155,165],"possibility":[69],"removing":[71,132],"network":[75,109],"outputs":[76],"according":[77],"distance":[80],"vectors.":[83],"Then,":[84],"computation":[86,110],"costs":[87],"beam":[89,114,158],"searching":[90,115],"can":[91,145],"effectively":[93],"reduced.":[94],"Based":[95],"on":[96,107,161],"that,":[97],"paper":[99],"introduces":[100],"novel":[102],"variable-frame-rate":[103],"decoding":[104,170,176],"approach":[105,144],"based":[106],"that":[111],"accelerates":[112],"minor":[120],"loss":[121],"accuracy.":[123],"By":[124],"computing":[125],"distances":[127],"and":[131,153],"vector":[137],"similar":[138],"previous":[141],"frame,":[142],"make":[146],"use":[147],"redundant":[149],"information":[150],"between":[151],"do":[154],"quicker":[157],"searching.":[159],"Experiments":[160],"LVCSR":[162],"tasks":[163],"show":[164],"2.4-times":[166],"speed":[167],"up":[168],"compared":[171],"framewise":[175],"implementation.":[177]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
