{"id":"https://openalex.org/W3011207290","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023133","title":"Exploring RNN-Transducer for Chinese speech recognition","display_name":"Exploring RNN-Transducer for Chinese speech recognition","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3011207290","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023133","mag":"3011207290"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023133","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023133","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102298615","display_name":"Senmao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Senmao Wang","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693197","display_name":"Pan Zhou","orcid":"https://orcid.org/0000-0002-8629-4622"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pan Zhou","raw_affiliation_strings":["Tiangong Institute for Intelligent Computing, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tiangong Institute for Intelligent Computing, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100344294","display_name":"Wei Chen","orcid":"https://orcid.org/0000-0001-5090-9915"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Chen","raw_affiliation_strings":["Voice Interaction Technology Center, Sogou Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Voice Interaction Technology Center, Sogou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405572","display_name":"Jia Jia","orcid":"https://orcid.org/0000-0002-7336-4003"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Jia","raw_affiliation_strings":["Tiangong Institute for Intelligent Computing, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tiangong Institute for Intelligent Computing, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102298615"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":3.68682589,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.94094998,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1364","last_page":"1369"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8745326399803162},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.8095260858535767},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6878280639648438},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6347135901451111},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6137745380401611},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.585983157157898},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5328018665313721},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5099965333938599},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5094123482704163},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.48247030377388},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46373382210731506},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4322047531604767},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2221490740776062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8745326399803162},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.8095260858535767},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6878280639648438},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6347135901451111},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6137745380401611},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.585983157157898},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5328018665313721},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5099965333938599},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5094123482704163},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.48247030377388},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46373382210731506},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4322047531604767},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2221490740776062},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023133","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023133","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6899999976158142,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1489125746","https://openalex.org/W1514535095","https://openalex.org/W1526990717","https://openalex.org/W1586532344","https://openalex.org/W1810943226","https://openalex.org/W1828163288","https://openalex.org/W2102113734","https://openalex.org/W2125838338","https://openalex.org/W2127141656","https://openalex.org/W2133564696","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2160815625","https://openalex.org/W2293634267","https://openalex.org/W2327501763","https://openalex.org/W2962826786","https://openalex.org/W2962965405","https://openalex.org/W2963211739","https://openalex.org/W2963414781","https://openalex.org/W3147165232","https://openalex.org/W6623517193","https://openalex.org/W6630875275","https://openalex.org/W6675365184","https://openalex.org/W6696934422"],"related_works":["https://openalex.org/W3184026069","https://openalex.org/W2608712415","https://openalex.org/W4287266619","https://openalex.org/W3136989387","https://openalex.org/W3163300396","https://openalex.org/W1710082047","https://openalex.org/W2147500075","https://openalex.org/W2901023650","https://openalex.org/W2161306565","https://openalex.org/W2227343915"],"abstract_inverted_index":{"End-to-end":[0],"approaches":[1],"have":[2,32],"drawn":[3],"much":[4],"attention":[5],"recently":[6],"for":[7,48,58],"significantly":[8],"simplifying":[9],"the":[10,25,72,90,101,104,112,116,131,169],"construction":[11],"of":[12,24,82,103,115,120,133],"an":[13],"automatic":[14],"speech":[15,64],"recognition":[16,65],"(ASR)":[17],"system.":[18],"RNN":[19],"transducer":[20],"(RNN-T)":[21],"is":[22,36,46,86,154],"one":[23],"popular":[26],"end-":[27],"to-end":[28],"methods.":[29],"Previous":[30],"studies":[31],"shown":[33],"that":[34,96],"RNN-T":[35,57],"difficult":[37],"to":[38,70,88,126],"train":[39],"and":[40,68,106,138],"a":[41,49,59,79,128,159],"very":[42],"complex":[43],"training":[44,73,136],"process":[45,74,114],"needed":[47],"reasonable":[50],"performance.":[51,77,121,140],"In":[52],"this":[53],"paper,":[54],"we":[55,94,123,142],"explore":[56],"Chinese":[60],"large":[61],"vocabulary":[62],"continuous":[63],"(LVCSR)":[66],"task":[67],"aim":[69],"simplify":[71],"while":[75],"maintaining":[76],"First,":[78],"new":[80],"strategy":[81],"learning":[83],"rate":[84,147],"decay":[85],"proposed":[87],"accelerate":[89],"model":[91,139,166],"convergence.":[92],"Second,":[93],"find":[95,127],"adding":[97],"convolutional":[98],"layers":[99],"at":[100],"beginning":[102],"network":[105],"using":[107],"ordered":[108],"data":[109],"can":[110],"discard":[111],"pre-training":[113],"encoder":[117],"without":[118],"loss":[119],"Besides,":[122],"design":[124],"experiments":[125],"balance":[129],"among":[130],"usage":[132],"GPU":[134],"memory,":[135],"circle":[137],"Finally,":[141],"achieve":[143],"16.9%":[144],"character":[145],"error":[146],"(CER)":[148],"on":[149,168],"our":[150],"test":[151],"set,":[152],"which":[153],"2%":[155],"absolute":[156],"improvement":[157],"from":[158],"strong":[160],"BLSTM":[161],"CE":[162],"system":[163],"with":[164],"language":[165],"trained":[167],"same":[170],"text":[171],"corpus.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":10}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
