{"id":"https://openalex.org/W4225707368","doi":"https://doi.org/10.1109/asru51503.2021.9688058","title":"Latency-Controlled Neural Architecture Search for Streaming Speech Recognition","display_name":"Latency-Controlled Neural Architecture Search for Streaming Speech Recognition","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4225707368","doi":"https://doi.org/10.1109/asru51503.2021.9688058"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688058","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074469791","display_name":"Liqiang He","orcid":"https://orcid.org/0000-0001-9179-6398"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang He","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018718403","display_name":"Shulin Feng","orcid":"https://orcid.org/0000-0002-2437-6227"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin Feng","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062034286","display_name":"Dan Su","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Su","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab,Bellevue,WA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Bellevue,WA,USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.24528302,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"62","last_page":"67"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.8083196878433228},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7959514260292053},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.6366933584213257},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.611020028591156},{"id":"https://openalex.org/keywords/time-delay-neural-network","display_name":"Time delay neural network","score":0.4213476777076721},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3731231093406677},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34283921122550964},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0895342230796814}],"concepts":[{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.8083196878433228},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7959514260292053},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.6366933584213257},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.611020028591156},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.4213476777076721},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3731231093406677},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34283921122550964},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0895342230796814},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688058","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1600744878","https://openalex.org/W2193413348","https://openalex.org/W2405883473","https://openalex.org/W2407080277","https://openalex.org/W2519091744","https://openalex.org/W2889048668","https://openalex.org/W2951104886","https://openalex.org/W2953318193","https://openalex.org/W2963242190","https://openalex.org/W2964081807","https://openalex.org/W2981748264","https://openalex.org/W2997326985","https://openalex.org/W3000557999","https://openalex.org/W3015492205","https://openalex.org/W3016153040","https://openalex.org/W3034411059","https://openalex.org/W3043474396","https://openalex.org/W3080625606","https://openalex.org/W3092169262","https://openalex.org/W3095175338","https://openalex.org/W3096025930","https://openalex.org/W3096888553","https://openalex.org/W3160521245","https://openalex.org/W3160919572","https://openalex.org/W3161873870","https://openalex.org/W3162465594","https://openalex.org/W3206752670","https://openalex.org/W6687566353","https://openalex.org/W6752515464","https://openalex.org/W6754473786","https://openalex.org/W6772281271","https://openalex.org/W6772812598","https://openalex.org/W6777615951","https://openalex.org/W6781029604","https://openalex.org/W6781752936","https://openalex.org/W6782478220","https://openalex.org/W6782849051","https://openalex.org/W6782926259"],"related_works":["https://openalex.org/W1584270863","https://openalex.org/W2603525251","https://openalex.org/W2085961337","https://openalex.org/W3113777316","https://openalex.org/W2386241395","https://openalex.org/W4246541945","https://openalex.org/W2357447513","https://openalex.org/W2107201395","https://openalex.org/W2381790306","https://openalex.org/W4241378172"],"abstract_inverted_index":{"Neural":[0],"architecture":[1,72],"search":[2],"(NAS)":[3],"has":[4,9],"attracted":[5],"much":[6],"attention":[7],"and":[8,26,97,109],"been":[10],"explored":[11],"for":[12,31],"automatic":[13],"speech":[14],"recognition":[15],"(ASR).":[16],"In":[17],"this":[18],"work,":[19],"we":[20],"focus":[21],"on":[22,36,82,129,142],"streaming":[23],"ASR":[24],"scenarios":[25],"propose":[27],"the":[28,37,50,54,70,83,87,107,116,120,130,138],"latency-controlled":[29],"NAS":[30],"acoustic":[32],"modeling.":[33],"First,":[34],"based":[35],"vanilla":[38,108],"neural":[39,85,88],"architecture,":[40,86],"normal":[41],"cells":[42,47],"are":[43],"altered":[44],"to":[45,48,68],"causal":[46],"control":[49],"total":[51],"latency":[52,93,100,118],"of":[53,94,101],"architecture.":[55],"Second,":[56],"a":[57,62,91,98,143],"revised":[58,110],"operation":[59,111],"space":[60,112],"with":[61,73,90,137],"smaller":[63],"receptive":[64],"field":[65],"is":[66],"proposed":[67,84],"generate":[69],"final":[71],"low":[74,99,117],"latency.":[75],"Extensive":[76],"experiments":[77],"show":[78],"that:":[79],"1)":[80],"Based":[81],"networks":[89],"medium":[92],"550ms":[95],"(millisecond)":[96],"190ms":[102],"can":[103,123],"be":[104],"learned":[105],"in":[106],"respectively.":[113],"2)":[114],"For":[115],"setting,":[119],"evaluation":[121],"network":[122],"achieve":[124],"more":[125],"than":[126],"19%":[127],"(average":[128],"four":[131],"test":[132],"sets)":[133],"relative":[134],"improvements":[135],"compared":[136],"hybrid":[139],"CLDNN":[140],"baseline,":[141],"10k-hour":[144],"large-scale":[145],"dataset.":[146]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
