{"id":"https://openalex.org/W2963031791","doi":"https://doi.org/10.1109/iscslp.2018.8706567","title":"Improving Gated Recurrent Unit Based Acoustic Modeling with Batch Normalization and Enlarged Context","display_name":"Improving Gated Recurrent Unit Based Acoustic Modeling with Batch Normalization and Enlarged Context","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2963031791","doi":"https://doi.org/10.1109/iscslp.2018.8706567","mag":"2963031791"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2018.8706567","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2018.8706567","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028750131","display_name":"Jie Li","orcid":"https://orcid.org/0000-0002-2053-4662"},"institutions":[{"id":"https://openalex.org/I2801745840","display_name":"Kwai Chung Hospital","ror":"https://ror.org/05kz7bw59","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I1294586568","https://openalex.org/I2801745840"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie Li","raw_affiliation_strings":["Kwai, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"Kwai, Beijing, P.R. China","institution_ids":["https://openalex.org/I2801745840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022676888","display_name":"Yahui Shan","orcid":"https://orcid.org/0000-0002-1510-2590"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yahui Shan","raw_affiliation_strings":["School of Information and Electronics, Beijing Institute of Technology, Beijing, P.R.China"],"affiliations":[{"raw_affiliation_string":"School of Information and Electronics, Beijing Institute of Technology, Beijing, P.R.China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100658019","display_name":"Xiaorui Wang","orcid":"https://orcid.org/0000-0001-9633-1418"},"institutions":[{"id":"https://openalex.org/I2801745840","display_name":"Kwai Chung Hospital","ror":"https://ror.org/05kz7bw59","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I1294586568","https://openalex.org/I2801745840"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaorui Wang","raw_affiliation_strings":["Kwai, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"Kwai, Beijing, P.R. China","institution_ids":["https://openalex.org/I2801745840"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101886099","display_name":"Yan Li","orcid":"https://orcid.org/0000-0003-3566-0992"},"institutions":[{"id":"https://openalex.org/I2801745840","display_name":"Kwai Chung Hospital","ror":"https://ror.org/05kz7bw59","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I1294586568","https://openalex.org/I2801745840"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Li","raw_affiliation_strings":["Kwai, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"Kwai, Beijing, P.R. China","institution_ids":["https://openalex.org/I2801745840"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5028750131"],"corresponding_institution_ids":["https://openalex.org/I2801745840"],"apc_list":null,"apc_paid":null,"fwci":0.3258,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70212593,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"1","issue":null,"first_page":"126","last_page":"130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7911309003829956},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6605207920074463},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6239127516746521},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5912877321243286},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5897913575172424},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.5498211979866028},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.47207164764404297},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.4576309323310852},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44856002926826477},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4331797659397125},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4217913746833801},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.31017830967903137},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2293613851070404},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.11350679397583008}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7911309003829956},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6605207920074463},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6239127516746521},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5912877321243286},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5897913575172424},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.5498211979866028},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.47207164764404297},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.4576309323310852},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44856002926826477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4331797659397125},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4217913746833801},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31017830967903137},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2293613851070404},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11350679397583008},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2018.8706567","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2018.8706567","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1499864241","https://openalex.org/W1566256432","https://openalex.org/W1836465849","https://openalex.org/W2005708641","https://openalex.org/W2064675550","https://openalex.org/W2079735306","https://openalex.org/W2100649405","https://openalex.org/W2116261113","https://openalex.org/W2117671523","https://openalex.org/W2131774270","https://openalex.org/W2136848157","https://openalex.org/W2147768505","https://openalex.org/W2171312815","https://openalex.org/W2173629880","https://openalex.org/W2208299922","https://openalex.org/W2284628133","https://openalex.org/W2329068866","https://openalex.org/W2402146185","https://openalex.org/W2407080277","https://openalex.org/W2512608784","https://openalex.org/W2514741789","https://openalex.org/W2517251061","https://openalex.org/W2631415506","https://openalex.org/W2696967604","https://openalex.org/W2729190387","https://openalex.org/W2794209590","https://openalex.org/W2804651231","https://openalex.org/W2949117887","https://openalex.org/W2962825331","https://openalex.org/W2962949994","https://openalex.org/W2962953733","https://openalex.org/W2963242190","https://openalex.org/W2963304263","https://openalex.org/W2963308316","https://openalex.org/W2964084166","https://openalex.org/W3104896896","https://openalex.org/W4302375066","https://openalex.org/W6629930100","https://openalex.org/W6638667902","https://openalex.org/W6640608466","https://openalex.org/W6651673773","https://openalex.org/W6684983028","https://openalex.org/W6688039610","https://openalex.org/W6701650085","https://openalex.org/W6712930963","https://openalex.org/W6713762819","https://openalex.org/W6739913424","https://openalex.org/W6740627434"],"related_works":["https://openalex.org/W2591697403","https://openalex.org/W2944728705","https://openalex.org/W2904022177","https://openalex.org/W2359348847","https://openalex.org/W3011538607","https://openalex.org/W4294432981","https://openalex.org/W4321441197","https://openalex.org/W3125011624","https://openalex.org/W3096664139","https://openalex.org/W4385890381"],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,59],"future":[3,44,62],"contextual":[4],"information":[5],"is":[6,38],"typically":[7],"shown":[8,55],"to":[9,41,56,79,116],"be":[10,57],"helpful":[11],"for":[12],"acoustic":[13],"modeling.":[14],"Recently,":[15],"we":[16,77],"proposed":[17],"a":[18,32,112,124],"RNN":[19],"model":[20,42,69,90,138],"called":[21],"minimal":[22],"gated":[23],"recurrent":[24],"unit":[25],"with":[26,49,66,82,111,131],"input":[27],"projection":[28],"(mGRUIP),":[29],"in":[30],"which":[31],"context":[33,50,63],"module":[34,51],"namely":[35],"temporal":[36],"convolution,":[37],"specifically":[39],"designed":[40],"the":[43,61,106,128],"context.":[45,91],"This":[46],"model,":[47],"mGRUIP":[48],"(mGRUIP-Ctx),":[52],"has":[53],"been":[54],"able":[58],"utilizing":[60],"effectively,":[64],"meanwhile":[65],"quite":[67],"low":[68],"latency":[70],"and":[71,88,101,135],"computation":[72],"cost.":[73],"In":[74],"this":[75],"paper,":[76],"continue":[78],"improve":[80],"mGRUIP-Ctx":[81,108],"two":[83,95],"revisions:":[84],"applying":[85],"BN":[86],"methods":[87],"enlarging":[89],"Experimental":[92],"results":[93],"on":[94,127],"Mandarin":[96],"ASR":[97],"tasks":[98],"(8400":[99],"hours":[100],"60K":[102],"hours)":[103],"show":[104],"that,":[105],"revised":[107],"outperform":[109],"LSTM":[110],"large":[113],"margin":[114],"(11%":[115],"38%).":[117],"It":[118],"even":[119],"performs":[120],"slightly":[121],"better":[122],"than":[123],"superior":[125],"BLSTM":[126],"8400h":[129],"task,":[130],"33M":[132],"less":[133],"parameters":[134],"just":[136],"290ms":[137],"latency.":[139]},"counts_by_year":[{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
