{"id":"https://openalex.org/W4391021131","doi":"https://doi.org/10.1109/asru57964.2023.10389787","title":"Discriminative Speech Recognition Rescoring With Pre-Trained Language Models","display_name":"Discriminative Speech Recognition Rescoring With Pre-Trained Language Models","publication_year":2023,"publication_date":"2023-12-16","ids":{"openalex":"https://openalex.org/W4391021131","doi":"https://doi.org/10.1109/asru57964.2023.10389787"},"language":"en","primary_location":{"id":"doi:10.1109/asru57964.2023.10389787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088032905","display_name":"Prashanth Gurunath Shivakumar","orcid":"https://orcid.org/0000-0003-1632-3309"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Prashanth Gurunath Shivakumar","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052467988","display_name":"Jari Kolehmainen","orcid":"https://orcid.org/0000-0002-4174-6277"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jari Kolehmainen","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022646825","display_name":"Yile Gu","orcid":"https://orcid.org/0000-0003-3662-5056"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yile Gu","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014054572","display_name":"Ankur Gandhe","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ankur Gandhe","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110230355","display_name":"Ariya Rastrow","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ariya Rastrow","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109934165","display_name":"Ivan Bulyko","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ivan Bulyko","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5088032905"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59611142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.9264967441558838},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.870606541633606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854560375213623},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6798677444458008},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6035864353179932},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5759795904159546},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5199404954910278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48591628670692444},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4688238799571991},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32566380500793457},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32193803787231445}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.9264967441558838},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.870606541633606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854560375213623},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6798677444458008},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6035864353179932},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5759795904159546},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5199404954910278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48591628670692444},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4688238799571991},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32566380500793457},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32193803787231445},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru57964.2023.10389787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2396464458","https://openalex.org/W2741951152","https://openalex.org/W2962824709","https://openalex.org/W2963747784","https://openalex.org/W2990138404","https://openalex.org/W3016188195","https://openalex.org/W3024308166","https://openalex.org/W3034775979","https://openalex.org/W3141464856","https://openalex.org/W4221160683","https://openalex.org/W4297841367","https://openalex.org/W4372269772","https://openalex.org/W4385822355","https://openalex.org/W4385823001","https://openalex.org/W4385823154","https://openalex.org/W4389518985","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6769613987","https://openalex.org/W6770528390","https://openalex.org/W6772383348","https://openalex.org/W6778883912","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W2113687551","https://openalex.org/W2112752961","https://openalex.org/W2162582511","https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W4221142855","https://openalex.org/W2050138804","https://openalex.org/W2129812225","https://openalex.org/W4290708361","https://openalex.org/W2523799048"],"abstract_inverted_index":{"Second":[0],"pass":[1],"rescoring":[2,27],"is":[3,121],"a":[4],"critical":[5],"component":[6],"of":[7,28,66],"competitive":[8],"automatic":[9],"speech":[10],"recognition":[11],"(ASR)":[12],"systems.":[13],"Large":[14],"language":[15],"models":[16],"have":[17],"demonstrated":[18],"their":[19],"ability":[20],"in":[21,85],"using":[22],"pre-trained":[23,55,80],"information":[24],"for":[25,54],"better":[26,122],"ASR":[29],"hypothesis.":[30],"Discriminative":[31],"training,":[32],"directly":[33],"optimizing":[34],"the":[35],"minimum":[36],"word-error-rate":[37],"(MWER)":[38],"criterion":[39],"typically":[40],"improves":[41],"rescoring.":[42],"In":[43],"this":[44],"study,":[45],"we":[46],"propose":[47,58],"and":[48,69,82],"explore":[49],"several":[50],"discriminative":[51,86,125],"fine-tuning":[52],"schemes":[53,96],"LMs.":[56],"We":[57,75],"two":[59],"architectures":[60],"based":[61,73],"on":[62,89],"different":[63],"pooling":[64,106],"strategies":[65],"output":[67],"embeddings":[68],"compare":[70],"with":[71,124],"probability":[72],"MWER.":[74],"conduct":[76],"detailed":[77],"comparisons":[78],"between":[79],"causal":[81],"bidirectional":[83],"LMs":[84],"settings.":[87],"Experiments":[88],"LibriSpeech":[90],"demonstrate":[91],"that":[92,119],"all":[93],"MWER":[94],"training":[95],"are":[97],"beneficial,":[98],"giving":[99],"additional":[100],"gains":[101],"upto":[102],"8.5%":[103],"WER.":[104],"Proposed":[105],"variants":[107],"achieve":[108],"lower":[109],"latency":[110],"while":[111],"retaining":[112],"most":[113],"improvements.":[114],"Finally,":[115],"our":[116],"study":[117],"concludes":[118],"bidirectionality":[120],"utilized":[123],"training.":[126]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
