{"id":"https://openalex.org/W3015918625","doi":"https://doi.org/10.1109/icassp40776.2020.9053872","title":"OOV Recovery with Efficient 2nd Pass Decoding and Open-vocabulary Word-level RNNLM Rescoring for Hybrid ASR","display_name":"OOV Recovery with Efficient 2nd Pass Decoding and Open-vocabulary Word-level RNNLM Rescoring for Hybrid ASR","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015918625","doi":"https://doi.org/10.1109/icassp40776.2020.9053872","mag":"3015918625"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088378499","display_name":"Xiaohui Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Xiaohui Zhang","raw_affiliation_strings":["Facebook AI, US"],"affiliations":[{"raw_affiliation_string":"Facebook AI, US","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084286453","display_name":"Daniel Povey","orcid":"https://orcid.org/0000-0002-0611-3634"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Povey","raw_affiliation_strings":["Center for Language and Speech Processing & Human Language Technology Center of Excellence, The Johns Hopkins University, Baltimore, MD, US"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing & Human Language Technology Center of Excellence, The Johns Hopkins University, Baltimore, MD, US","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014580424","display_name":"Sanjeev Khudanpur","orcid":"https://orcid.org/0000-0001-5976-0897"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanjeev Khudanpur","raw_affiliation_strings":["Center for Language and Speech Processing & Human Language Technology Center of Excellence, The Johns Hopkins University, Baltimore, MD, US"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing & Human Language Technology Center of Excellence, The Johns Hopkins University, Baltimore, MD, US","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088378499"],"corresponding_institution_ids":["https://openalex.org/I2252078561"],"apc_list":null,"apc_paid":null,"fwci":0.3977,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.67333793,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"6334","last_page":"6338"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8877228498458862},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7787787914276123},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.711143970489502},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.686593770980835},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6151232719421387},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5958559513092041},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49409058690071106},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.45893192291259766},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45500752329826355},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33556365966796875},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.23820790648460388},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11808064579963684},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.055972397327423096}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8877228498458862},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7787787914276123},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.711143970489502},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.686593770980835},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6151232719421387},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5958559513092041},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49409058690071106},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.45893192291259766},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45500752329826355},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33556365966796875},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.23820790648460388},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11808064579963684},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.055972397327423096},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W29042482","https://openalex.org/W58893626","https://openalex.org/W141101499","https://openalex.org/W143425341","https://openalex.org/W593944373","https://openalex.org/W1501139663","https://openalex.org/W1524333225","https://openalex.org/W1528716106","https://openalex.org/W1903206367","https://openalex.org/W2008349970","https://openalex.org/W2113540630","https://openalex.org/W2140281218","https://openalex.org/W2144087290","https://openalex.org/W2166850712","https://openalex.org/W2406494896","https://openalex.org/W2578686723","https://openalex.org/W2783520527","https://openalex.org/W2787663903","https://openalex.org/W2800971787","https://openalex.org/W2802422770","https://openalex.org/W2808939837","https://openalex.org/W2888867175","https://openalex.org/W2890705133","https://openalex.org/W2940180244","https://openalex.org/W2944255943","https://openalex.org/W2963347649","https://openalex.org/W2964107261","https://openalex.org/W3103005696","https://openalex.org/W4301749008","https://openalex.org/W6601176331","https://openalex.org/W6602415096","https://openalex.org/W6617627564","https://openalex.org/W6631362777","https://openalex.org/W6631681581","https://openalex.org/W6677095389","https://openalex.org/W6746497793","https://openalex.org/W6747860505","https://openalex.org/W6844943890"],"related_works":["https://openalex.org/W4298287631","https://openalex.org/W2953061907","https://openalex.org/W1847088711","https://openalex.org/W4225394202","https://openalex.org/W3036642985","https://openalex.org/W3032952384","https://openalex.org/W2964335273","https://openalex.org/W2160451571","https://openalex.org/W2495256954","https://openalex.org/W2259317772"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,53,94,141],"investigate":[4],"out-of-vocabulary":[5],"(OOV)":[6],"word":[7],"recovery":[8,62,82,132,146],"in":[9],"hybrid":[10,45],"automatic":[11],"speech":[12],"recognition":[13],"(ASR)":[14],"systems,":[15],"with":[16,49,63,159],"emphasis":[17],"on":[18,43,137],"dynamic":[19],"vocabulary":[20],"expansion":[21],"for":[22,57],"both":[23],"Weight":[24],"Finite":[25],"State":[26],"Transducer":[27],"(WFST)-based":[28],"decoding":[29,85,135,157,168],"and":[30,83,133,169],"word-level":[31,98,120],"RNNLM":[32,121,170],"rescoring.":[33],"We":[34],"first":[35,90],"describe":[36],"our":[37],"OOV":[38,61,72,81,131,145],"candidate":[39],"generation":[40],"method":[41],"based":[42,167],"a":[44,55,64,118,164],"lexical":[46],"model":[47,75,103],"(HLM)":[48],"phoneme-sequence":[50],"constraints.":[51],"Next,":[52],"introduce":[54],"framework":[56],"efficient":[58,153],"second":[59],"pass":[60,91],"dynamically":[65],"expanded":[66],"vocabulary,":[67],"showing":[68],"that,":[69],"by":[70],"calibrating":[71],"candidates\u2019":[73],"language":[74,102],"(LM)":[76],"scores,":[77],"it":[78,108,126],"significantly":[79],"improves":[80],"overall":[84,134],"performance":[86,136],"compared":[87],"to":[88,110],"HLM-based":[89],"decoding.":[92],"Finally":[93],"propose":[95],"an":[96,152],"open-vocabulary":[97],"recurrent":[99],"neural":[100],"network":[101],"(RNNLM)":[104],"re-scoring":[105],"framework,":[106,158],"making":[107],"possible":[109],"re-score":[111],"ASR":[112,139,156],"hypotheses":[113],"containing":[114],"recovered":[115],"OOVs,":[116],"using":[117],"single":[119],"ignorant":[122],"of":[123,151],"OOVs":[124],"when":[125],"was":[127],"trained.":[128],"By":[129],"evaluating":[130],"Spanish/English":[138],"\u2018tasks,":[140],"show":[142],"the":[143,149],"proposed":[144],"pipeline":[147],"has":[148],"potential":[150],"open-vocab":[154],"word-based":[155],"minimal":[160],"extra":[161],"computation":[162],"versus":[163],"standard":[165],"WFST":[166],"rescoring":[171],"pipeline.":[172]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
