{"id":"https://openalex.org/W3008174054","doi":"https://doi.org/10.1109/asru46091.2019.9003822","title":"Monotonic Recurrent Neural Network Transducer and Decoding Strategies","display_name":"Monotonic Recurrent Neural Network Transducer and Decoding Strategies","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008174054","doi":"https://doi.org/10.1109/asru46091.2019.9003822","mag":"3008174054"},"language":"en","primary_location":{"id":"doi:10.1109/asru46091.2019.9003822","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003822","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101403998","display_name":"Anshuman Tripathi","orcid":"https://orcid.org/0000-0002-4902-3719"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anshuman Tripathi","raw_affiliation_strings":["Google Inc., USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086894713","display_name":"Lu Han","orcid":"https://orcid.org/0000-0002-1120-3220"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Lu","raw_affiliation_strings":["Google Inc., USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108537573","display_name":"Ha\u015fim Sak","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hasim Sak","raw_affiliation_strings":["Google Inc., USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010878109","display_name":"Hagen Soltau","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hagen Soltau","raw_affiliation_strings":["Google Inc., USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101403998"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":3.901,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.95025676,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"944","last_page":"948"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monotonic-function","display_name":"Monotonic function","score":0.8458791971206665},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.8374890685081482},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7193093299865723},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.665539026260376},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5945050716400146},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5910049080848694},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.46002665162086487},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.44460102915763855},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.43368515372276306},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.42707398533821106},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3522588014602661},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3455187976360321},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18752771615982056},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.08693951368331909}],"concepts":[{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.8458791971206665},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.8374890685081482},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7193093299865723},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.665539026260376},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5945050716400146},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5910049080848694},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.46002665162086487},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.44460102915763855},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.43368515372276306},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.42707398533821106},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3522588014602661},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3455187976360321},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18752771615982056},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.08693951368331909},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru46091.2019.9003822","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003822","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W309335912","https://openalex.org/W1522301498","https://openalex.org/W1828163288","https://openalex.org/W2064675550","https://openalex.org/W2127141656","https://openalex.org/W2293634267","https://openalex.org/W2327501763","https://openalex.org/W2746192915","https://openalex.org/W2773723600","https://openalex.org/W2962826786","https://openalex.org/W2963590452","https://openalex.org/W2963747784","https://openalex.org/W2964121744","https://openalex.org/W6696934422"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W2157978810","https://openalex.org/W2597809628","https://openalex.org/W3046370962"],"abstract_inverted_index":{"Recurrent":[0],"Neural":[1],"Network":[2],"Transducer":[3],"(RNNT)":[4],"is":[5,72,101,136,162],"an":[6],"end-to-end":[7],"model":[8,60],"which":[9],"transduces":[10],"discrete":[11],"input":[12,106],"sequences":[13,16],"to":[14,116,138],"output":[15,99],"by":[17],"learning":[18],"alignments":[19,56,120],"between":[20,34,121],"the":[21,41,59,77,91,95,98,105,122],"sequences.":[22,123],"In":[23],"speech":[24,130],"recognition":[25,131],"tasks":[26],"we":[27,85],"generally":[28],"have":[29],"a":[30,62,67,87],"strictly":[31,118],"monotonic":[32,88,119,134,151],"alignment":[33],"time":[35,69,79],"frames":[36],"and":[37,145,152,166,185],"label":[38,179],"sequence.":[39],"However,":[40],"standard":[42,139,153],"RNNT":[43,92,135,154],"loss":[44,109],"does":[45],"not":[46,102],"enforce":[47],"this":[48,108],"constraint.":[49],"This":[50],"can":[51,110],"cause":[52],"some":[53],"anomalies":[54],"in":[55,164,187],"such":[57],"as":[58],"outputting":[61],"sequence":[63,100],"of":[64,90,175],"labels":[65],"at":[66],"single":[68],"frame.":[70],"There":[71],"also":[73,142,172],"no":[74],"bound":[75],"on":[76],"decoding":[78,147,188],"steps.":[80],"To":[81],"address":[82],"these":[83],"problems,":[84],"introduce":[86],"version":[89],"loss.":[93],"Under":[94],"assumption":[96],"that":[97,129,159],"longer":[103],"than":[104],"sequence,":[107],"be":[111],"used":[112],"with":[113],"forward-backward":[114],"algorithm":[115],"learn":[117],"We":[124,141],"present":[125],"experimental":[126],"studies":[127],"showing":[128],"accuracy":[132],"for":[133,149],"equivalent":[137],"RNNT.":[140],"explore":[143],"best-first":[144],"breadth-first":[146,160],"strategies":[148],"both":[150],"models.":[155],"Our":[156],"experiments":[157],"show":[158],"search":[161,178],"effective":[163],"exploring":[165],"combining":[167],"alternative":[168],"alignments.":[169],"Additionally,":[170],"it":[171],"allows":[173],"batching":[174],"hypotheses":[176],"during":[177],"expansion,":[180],"allowing":[181],"better":[182],"resource":[183],"utilization,":[184],"resulting":[186],"speedup.":[189]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":6}],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-10-10T00:00:00"}
