{"id":"https://openalex.org/W4210424357","doi":"https://doi.org/10.1109/asru51503.2021.9688247","title":"Word-Level Confidence Estimation for RNN Transducers","display_name":"Word-Level Confidence Estimation for RNN Transducers","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210424357","doi":"https://doi.org/10.1109/asru51503.2021.9688247"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688247","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036732709","display_name":"Mingqiu Wang","orcid":"https://orcid.org/0000-0001-7164-0054"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mingqiu Wang","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010878109","display_name":"Hagen Soltau","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hagen Soltau","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072051996","display_name":"Laurent El Shafey","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laurent El Shafey","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025419994","display_name":"Izhak Shafran","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Izhak Shafran","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036732709"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.5076,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.68062101,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1170","last_page":"1177"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8300951719284058},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6259957551956177},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5708031058311462},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5476261377334595},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5204327702522278},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5145804286003113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5134757161140442},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5062236189842224},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48424646258354187},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.46148839592933655},{"id":"https://openalex.org/keywords/confidence-interval","display_name":"Confidence interval","score":0.4556087255477905},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3847030997276306},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37839919328689575},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13792625069618225}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8300951719284058},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6259957551956177},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5708031058311462},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5476261377334595},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5204327702522278},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5145804286003113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5134757161140442},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5062236189842224},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48424646258354187},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.46148839592933655},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.4556087255477905},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3847030997276306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37839919328689575},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13792625069618225},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688247","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W55333121","https://openalex.org/W309335912","https://openalex.org/W1795658042","https://openalex.org/W1828163288","https://openalex.org/W2066243589","https://openalex.org/W2127141656","https://openalex.org/W2129334286","https://openalex.org/W2166810516","https://openalex.org/W2254249950","https://openalex.org/W2280141299","https://openalex.org/W2327501763","https://openalex.org/W2594610113","https://openalex.org/W2626967530","https://openalex.org/W2898630520","https://openalex.org/W2948194985","https://openalex.org/W2948210185","https://openalex.org/W2950170869","https://openalex.org/W2963238274","https://openalex.org/W2963351448","https://openalex.org/W2963505832","https://openalex.org/W3013333272","https://openalex.org/W3016010032","https://openalex.org/W3026533485","https://openalex.org/W3094288413","https://openalex.org/W3095838132","https://openalex.org/W3097375352","https://openalex.org/W3134787422","https://openalex.org/W3161324588","https://openalex.org/W3163169798","https://openalex.org/W3196435865","https://openalex.org/W3196548933","https://openalex.org/W3197451691","https://openalex.org/W6602268766","https://openalex.org/W6638749077","https://openalex.org/W6691692454","https://openalex.org/W6730042731","https://openalex.org/W6739651123","https://openalex.org/W6774870834"],"related_works":["https://openalex.org/W4298195702","https://openalex.org/W2160451571","https://openalex.org/W3093768914","https://openalex.org/W2495256954","https://openalex.org/W1566315437","https://openalex.org/W2259317772","https://openalex.org/W4378498597","https://openalex.org/W2945402993","https://openalex.org/W3101140821","https://openalex.org/W4287816966"],"abstract_inverted_index":{"Confidence":[0],"estimate":[1,22],"is":[2,141],"an":[3],"often":[4],"requested":[5],"feature":[6],"in":[7,34,180],"applications":[8,172],"such":[9],"as":[10],"medical":[11,28],"transcription":[12],"where":[13],"errors":[14,33],"can":[15],"impact":[16],"patient":[17],"care":[18],"and":[19,80,84,91,100,134,158,173,192],"the":[20,68,77,97,123,164,175,184],"confidence":[21,44],"could":[23],"be":[24],"used":[25],"to":[26,30,60,169],"alert":[27],"professionals":[29],"verify":[31],"potential":[32],"recognition.":[35],"In":[36],"this":[37],"paper,":[38],"we":[39,120],"present":[40],"a":[41,82,126],"lightweight":[42],"neural":[43],"model":[45,65,124],"tailored":[46],"for":[47,87,177,187],"Automatic":[48],"Speech":[49],"Recognition":[50],"(ASR)":[51],"system":[52],"with":[53,72],"Recurrent":[54],"Neural":[55],"Network":[56],"Transducers":[57],"(RNN-T).":[58],"Compared":[59],"other":[61],"existing":[62],"approaches,":[63],"our":[64],"utilizes:":[66],"(a)":[67],"time":[69],"information":[70],"associated":[71],"recognized":[73],"words,":[74],"which":[75],"reduces":[76],"computational":[78],"complexity,":[79],"(b)":[81],"simple":[83],"elegant":[85],"trick":[86],"mapping":[88,95],"between":[89,107],"sub-word":[90],"word":[92],"sequences.":[93],"The":[94],"addresses":[96],"non-unique":[98],"tokenization":[99],"token":[101],"deletion":[102],"problems":[103],"while":[104],"amplifying":[105],"differences":[106],"confusable":[108],"words.":[109],"Through":[110],"extensive":[111],"empirical":[112],"evaluations":[113],"on":[114],"two":[115],"different":[116,144],"long-form":[117],"test":[118],"sets,":[119],"demonstrate":[121],"that":[122],"achieves":[125],"performance":[127],"of":[128,166],"0.4":[129],"Normalized":[130],"Cross":[131],"Entropy":[132],"(NCE)":[133],"0.05":[135],"Expected":[136],"Calibration":[137],"Error":[138],"(ECE).":[139],"It":[140],"robust":[142],"across":[143],"ASR":[145],"configurations,":[146],"including":[147],"target":[148],"types":[149],"(graphemes":[150],"vs.":[151,156],"morphemes),":[152],"traffic":[153],"conditions":[154],"(streaming":[155],"non-streaming),":[157],"encoder":[159],"types.":[160],"We":[161],"further":[162,178],"discuss":[163],"importance":[165],"evaluation":[167],"metrics":[168],"reflect":[170],"practical":[171],"highlight":[174],"need":[176],"work":[179],"improving":[181],"Area":[182],"Under":[183],"Curve":[185],"(AUC)":[186],"Negative":[188,194],"Precision":[189],"Rate":[190,195],"(NPV)":[191],"True":[193],"(TNR).":[196]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
