{"id":"https://openalex.org/W3154863085","doi":"https://doi.org/10.3390/info12040165","title":"A 2D Convolutional Gating Mechanism for Mandarin Streaming Speech Recognition","display_name":"A 2D Convolutional Gating Mechanism for Mandarin Streaming Speech Recognition","publication_year":2021,"publication_date":"2021-04-12","ids":{"openalex":"https://openalex.org/W3154863085","doi":"https://doi.org/10.3390/info12040165","mag":"3154863085"},"language":"en","primary_location":{"id":"doi:10.3390/info12040165","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info12040165","pdf_url":"https://www.mdpi.com/2078-2489/12/4/165/pdf","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/12/4/165/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102768876","display_name":"Xintong Wang","orcid":"https://orcid.org/0000-0002-3702-2380"},"institutions":[{"id":"https://openalex.org/I31683504","display_name":"Beijing Forestry University","ror":"https://ror.org/04xv2pc41","country_code":"CN","type":"education","lineage":["https://openalex.org/I1327237609","https://openalex.org/I31683504","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xintong Wang","raw_affiliation_strings":["College of Science, Beijing Forestry University, Beijing 100083, China"],"raw_orcid":"https://orcid.org/0000-0002-3702-2380","affiliations":[{"raw_affiliation_string":"College of Science, Beijing Forestry University, Beijing 100083, China","institution_ids":["https://openalex.org/I31683504"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038184903","display_name":"Chuangang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I31683504","display_name":"Beijing Forestry University","ror":"https://ror.org/04xv2pc41","country_code":"CN","type":"education","lineage":["https://openalex.org/I1327237609","https://openalex.org/I31683504","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chuangang Zhao","raw_affiliation_strings":["School of Information Science &amp; Technology, Beijing Forestry University, Beijing 100083, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Science &amp; Technology, Beijing Forestry University, Beijing 100083, China","institution_ids":["https://openalex.org/I31683504"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038184903"],"corresponding_institution_ids":["https://openalex.org/I31683504"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.2798,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62166414,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"12","issue":"4","first_page":"165","last_page":"165"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.8834927082061768},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8044172525405884},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6550846099853516},{"id":"https://openalex.org/keywords/gating","display_name":"Gating","score":0.5745917558670044},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5463290214538574},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4779803156852722},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.44550928473472595},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.42713114619255066},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.23610490560531616}],"concepts":[{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.8834927082061768},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8044172525405884},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6550846099853516},{"id":"https://openalex.org/C194544171","wikidata":"https://www.wikidata.org/wiki/Q21105679","display_name":"Gating","level":2,"score":0.5745917558670044},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5463290214538574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4779803156852722},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.44550928473472595},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.42713114619255066},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.23610490560531616},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C42407357","wikidata":"https://www.wikidata.org/wiki/Q521","display_name":"Physiology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/info12040165","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info12040165","pdf_url":"https://www.mdpi.com/2078-2489/12/4/165/pdf","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ed3d767f87a64212ab83b81ec7de033d","is_oa":true,"landing_page_url":"https://doaj.org/article/ed3d767f87a64212ab83b81ec7de033d","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 12, Iss 4, p 165 (2021)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/12/4/165/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/info12040165","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information; Volume 12; Issue 4; Pages: 165","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/info12040165","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info12040165","pdf_url":"https://www.mdpi.com/2078-2489/12/4/165/pdf","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3154863085.pdf","grobid_xml":"https://content.openalex.org/works/W3154863085.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2064675550","https://openalex.org/W2119631826","https://openalex.org/W2133564696","https://openalex.org/W2143612262","https://openalex.org/W2144499799","https://openalex.org/W2327501763","https://openalex.org/W2491408735","https://openalex.org/W2526425061","https://openalex.org/W2618530766","https://openalex.org/W2626778328","https://openalex.org/W2746192915","https://openalex.org/W2752387826","https://openalex.org/W2752782242","https://openalex.org/W2755682845","https://openalex.org/W2941814890","https://openalex.org/W2962760690","https://openalex.org/W2962780374","https://openalex.org/W2962835968","https://openalex.org/W2963242190","https://openalex.org/W2963414781","https://openalex.org/W2963420686","https://openalex.org/W2963911037","https://openalex.org/W2963970792","https://openalex.org/W2964308564","https://openalex.org/W2972818416","https://openalex.org/W2982413405","https://openalex.org/W3007328579","https://openalex.org/W3015583403","https://openalex.org/W3016010032","https://openalex.org/W3048704486","https://openalex.org/W3096686110","https://openalex.org/W3097777922","https://openalex.org/W3127804529","https://openalex.org/W3132811364","https://openalex.org/W6638444622","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2374317326","https://openalex.org/W2990005675","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W2163874654"],"abstract_inverted_index":{"Recent":[0],"research":[1],"shows":[2],"recurrent":[3],"neural":[4],"network-Transducer":[5],"(RNN-T)":[6],"architecture":[7],"has":[8,114],"become":[9],"a":[10,48,71,115],"mainstream":[11],"approach":[12],"for":[13],"streaming":[14,33],"speech":[15,34],"recognition.":[16,35],"In":[17],"this":[18],"work,":[19],"we":[20,46],"investigate":[21],"the":[22,26,30,38,44,53,58,65,80,93,97,108,123],"VGG2":[23],"network":[24],"as":[25],"input":[27,39],"layer":[28],"to":[29,43,60,75,84,87,107],"RNN-T":[31,94,110],"in":[32,64,79],"Specifically,":[36],"before":[37],"feature":[40],"is":[41,83],"passed":[42],"RNN-T,":[45],"introduce":[47],"gated-VGG2":[49,99],"block,":[50],"which":[51],"uses":[52],"first":[54],"two":[55],"layers":[56],"of":[57],"VGG16":[59],"extract":[61],"contextual":[62],"information":[63,78],"time":[66],"domain,":[67],"and":[68,112,118],"then":[69],"use":[70],"SEnet-style":[72],"gating":[73],"mechanism":[74],"control":[76],"what":[77],"channel":[81],"domain":[82],"be":[85],"propagated":[86],"RNN-T.":[88],"The":[89],"results":[90],"show":[91],"that":[92],"model":[95],"with":[96],"proposed":[98],"block":[100],"brings":[101],"significant":[102],"performance":[103],"improvement":[104],"when":[105],"compared":[106],"existing":[109],"model,":[111],"it":[113],"lower":[116],"latency":[117],"character":[119],"error":[120],"rate":[121],"than":[122],"Transformer-based":[124],"model.":[125]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
