{"id":"https://openalex.org/W3104912442","doi":"https://doi.org/10.1109/slt48900.2021.9383606","title":"Alignment Restricted Streaming Recurrent Neural Network Transducer","display_name":"Alignment Restricted Streaming Recurrent Neural Network Transducer","publication_year":2021,"publication_date":"2021-01-19","ids":{"openalex":"https://openalex.org/W3104912442","doi":"https://doi.org/10.1109/slt48900.2021.9383606","mag":"3104912442"},"language":"en","primary_location":{"id":"doi:10.1109/slt48900.2021.9383606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.03072","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074237839","display_name":"Jay Mahadeokar","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Jay Mahadeokar","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047358828","display_name":"Yuan Shangguan","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yuan Shangguan","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103098852","display_name":"Duc Le","orcid":"https://orcid.org/0000-0001-9490-2563"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Duc Le","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048538280","display_name":"Gil Keren","orcid":"https://orcid.org/0000-0002-5153-3494"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Gil Keren","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341891","display_name":"Hang Su","orcid":"https://orcid.org/0000-0002-6877-6783"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Hang Su","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058126062","display_name":"Thong Le","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Thong Le","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087421101","display_name":"Ching-Feng Yeh","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ching-Feng Yeh","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047073253","display_name":"Christian Fuegen","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Christian Fuegen","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041313589","display_name":"Michael L. Seltzer","orcid":"https://orcid.org/0000-0003-3474-2451"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Michael L. Seltzer","raw_affiliation_strings":["Facebook AI","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5074237839"],"corresponding_institution_ids":["https://openalex.org/I2252078561"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01012792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"52","last_page":"59"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.9443762898445129},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8262556791305542},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6195375919342041},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6108341813087463},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5859236717224121},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5354872345924377},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5297830104827881},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.45124363899230957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3329215943813324},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3167579770088196},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1430058479309082},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11479905247688293}],"concepts":[{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.9443762898445129},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8262556791305542},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6195375919342041},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6108341813087463},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5859236717224121},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5354872345924377},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5297830104827881},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.45124363899230957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3329215943813324},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3167579770088196},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1430058479309082},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11479905247688293},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/slt48900.2021.9383606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.03072","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.03072","pdf_url":"https://arxiv.org/pdf/2011.03072","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3104912442","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2011.03072.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2011.03072","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.03072","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/vwy9-9j67","is_oa":true,"landing_page_url":"https://doi.org/10.17023/vwy9-9j67","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.03072","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.03072","pdf_url":"https://arxiv.org/pdf/2011.03072","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3104912442.pdf","grobid_xml":"https://content.openalex.org/works/W3104912442.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2291513470","https://openalex.org/W2407080277","https://openalex.org/W2514741789","https://openalex.org/W2625979394","https://openalex.org/W2746192915","https://openalex.org/W2766219058","https://openalex.org/W2933138175","https://openalex.org/W2935756939","https://openalex.org/W2936774411","https://openalex.org/W2952509486","https://openalex.org/W2962760690","https://openalex.org/W2963250244","https://openalex.org/W2963382687","https://openalex.org/W2963414781","https://openalex.org/W2964084166","https://openalex.org/W2970971581","https://openalex.org/W2975044525","https://openalex.org/W3007227084","https://openalex.org/W3007528493","https://openalex.org/W3008174054","https://openalex.org/W3008525923","https://openalex.org/W3008898571","https://openalex.org/W3015194534","https://openalex.org/W3015315932","https://openalex.org/W3015927303","https://openalex.org/W3016234571","https://openalex.org/W3028545098","https://openalex.org/W3042007685","https://openalex.org/W3094667432","https://openalex.org/W3096888553","https://openalex.org/W6638749077","https://openalex.org/W6713762819","https://openalex.org/W6766978945","https://openalex.org/W6768205276","https://openalex.org/W6780612146"],"related_works":["https://openalex.org/W3149509723","https://openalex.org/W3007227084","https://openalex.org/W2987019345","https://openalex.org/W3162444624","https://openalex.org/W3095783102","https://openalex.org/W3022631507","https://openalex.org/W2952276042","https://openalex.org/W3199016780","https://openalex.org/W2901023650","https://openalex.org/W2787376081","https://openalex.org/W3021984791","https://openalex.org/W2963414781","https://openalex.org/W3151287998","https://openalex.org/W3097961476","https://openalex.org/W2892090442","https://openalex.org/W3006088799","https://openalex.org/W3015686596","https://openalex.org/W2976556660","https://openalex.org/W2804651231","https://openalex.org/W1489125746"],"abstract_inverted_index":{"There":[0],"is":[1,24],"a":[2,27,43,76,124],"growing":[3],"interest":[4],"in":[5,9],"the":[6,37,79,97,102,120,129,132,137,151,165],"speech":[7,19],"community":[8],"developing":[10],"Recurrent":[11],"Neural":[12],"Network":[13],"Transducer":[14],"(RNN-T)":[15],"models":[16,46,144],"for":[17,59,169,178],"automatic":[18],"recognition":[20],"(ASR)":[21],"applications.":[22],"RNN-T":[23,45,80,87],"trained":[25],"with":[26,48,105],"loss":[28,81,98,122,167],"function":[29,82],"that":[30,119],"does":[31],"not":[32],"enforce":[33],"temporal":[34],"alignment":[35,93],"of":[36,62,162],"training":[38,185],"transcripts":[39],"and":[40,83,114,136,173,186],"audio.":[41],"As":[42],"result,":[44],"built":[47],"uni-directional":[49],"long":[50],"short":[51],"term":[52],"memory":[53],"(LSTM)":[54],"encoders":[55],"tend":[56],"to":[57,78,95,127],"wait":[58],"longer":[60],"spans":[61],"input":[63],"audio,":[64],"before":[65],"streaming":[66],"already":[67],"decoded":[68],"ASR":[69,152],"tokens.":[70],"In":[71],"this":[72],"work,":[73],"we":[74],"propose":[75],"modification":[77],"develop":[84],"Alignment":[85],"Restricted":[86],"(Ar-RNN-T)":[88],"models,":[89],"which":[90],"utilize":[91],"audio-text":[92],"in-formation":[94],"guide":[96],"computation.":[99],"We":[100,117],"compare":[101],"proposed":[103],"method":[104],"existing":[106],"works,":[107],"such":[108,149],"as":[109,150],"monotonic":[110],"RNN-T,":[111],"on":[112,188],"LibriSpeech":[113],"in-house":[115],"datasets.":[116],"show":[118],"Ar-RNN-T":[121,143,166],"provides":[123],"refined":[125],"control":[126],"navigate":[128],"trade-offs":[130],"between":[131],"token":[133,156],"emission":[134],"delays":[135],"Word":[138],"Error":[139],"Rate":[140],"(WER).":[141],"The":[142],"also":[145],"improve":[146],"downstream":[147],"applications":[148],"End-pointing":[153],"by":[154],"guaranteeing":[155],"emissions":[157],"within":[158],"any":[159],"given":[160],"range":[161],"latency.":[163],"Moreover,":[164],"allows":[168],"bigger":[170],"batch":[171],"sizes":[172],"4":[174],"times":[175],"higher":[176],"throughput":[177],"our":[179],"LSTM":[180],"model":[181],"architecture,":[182],"enabling":[183],"faster":[184],"convergence":[187],"GPUs.":[189]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
