{"id":"https://openalex.org/W3164673815","doi":"https://doi.org/10.21437/interspeech.2021-542","title":"Improving RNN-T ASR Accuracy Using Context Audio","display_name":"Improving RNN-T ASR Accuracy Using Context Audio","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3164673815","doi":"https://doi.org/10.21437/interspeech.2021-542","mag":"3164673815"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-542","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-542","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.10538","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015722753","display_name":"Andreas Schwarz","orcid":"https://orcid.org/0000-0001-5484-3976"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Andreas Schwarz","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064043969","display_name":"Ilya Sklyar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ilya Sklyar","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077727156","display_name":"Simon Wiesler","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Simon Wiesler","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015722753"],"corresponding_institution_ids":["https://openalex.org/I4210089985"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0629674,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1792","last_page":"1796"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8684594631195068},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.8082934021949768},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7342785000801086},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.677214503288269},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6639130711555481},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6570967435836792},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6054354906082153},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47449159622192383},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4453474283218384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4215025305747986}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8684594631195068},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.8082934021949768},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7342785000801086},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.677214503288269},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6639130711555481},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6570967435836792},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6054354906082153},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47449159622192383},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4453474283218384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4215025305747986},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2021-542","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-542","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.10538","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.10538","pdf_url":"https://arxiv.org/pdf/2011.10538","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3164673815","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2011.10538.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2011.10538","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.10538","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.10538","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.10538","pdf_url":"https://arxiv.org/pdf/2011.10538","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.699999988079071,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3164673815.pdf","grobid_xml":"https://content.openalex.org/works/W3164673815.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2064675550","https://openalex.org/W2507319753","https://openalex.org/W2513836842","https://openalex.org/W2749659773","https://openalex.org/W2891367150","https://openalex.org/W2912512634","https://openalex.org/W2936774411","https://openalex.org/W2963931415","https://openalex.org/W3008181812","https://openalex.org/W3015995734","https://openalex.org/W3016010032","https://openalex.org/W3092122846","https://openalex.org/W3095173472"],"related_works":["https://openalex.org/W3198413388","https://openalex.org/W3108266107","https://openalex.org/W3105804405","https://openalex.org/W3168872064","https://openalex.org/W3108184206","https://openalex.org/W2992448548","https://openalex.org/W2913566369","https://openalex.org/W2952010730","https://openalex.org/W3186546663","https://openalex.org/W3095773170","https://openalex.org/W3211228299","https://openalex.org/W3035299099","https://openalex.org/W3097573669","https://openalex.org/W2964182350","https://openalex.org/W2790326622","https://openalex.org/W2981532039","https://openalex.org/W3168285598","https://openalex.org/W3080248383","https://openalex.org/W2520176975","https://openalex.org/W2890351244"],"abstract_inverted_index":{"We":[0,43,77],"present":[1,94],"a":[2,30,67,72,119],"training":[3,52,84],"scheme":[4],"for":[5,71],"streaming":[6],"automatic":[7],"speech":[8,92],"recognition":[9],"(ASR)":[10],"based":[11,124],"on":[12,86],"recurrent":[13],"neural":[14],"network":[15,22,104],"transducers":[16],"(RNN-T)":[17],"which":[18,97],"allows":[19],"the":[20,39,46,79,82,103,116,140],"encoder":[21,126],"to":[23,25,57,127,139],"learn":[24,105],"exploit":[26,128],"context":[27,49],"audio":[28,50],"from":[29],"stream,":[31],"using":[32],"segmented":[33],"or":[34],"partially":[35],"labeled":[36],"sequences":[37],"of":[38,48,62,81,118],"stream":[40],"during":[41,51],"training.":[42],"show":[44],"that":[45,99],"use":[47],"and":[53,93,108],"inference":[54],"can":[55],"lead":[56],"word":[58],"error":[59],"rate":[60],"reductions":[61],"more":[63],"than":[64],"6%":[65],"in":[66],"realistic":[68],"production":[69],"setting":[70],"voice":[73],"assistant":[74],"ASR":[75,125],"system.":[76],"investigate":[78],"effect":[80],"proposed":[83],"approach":[85,101],"acoustically":[87],"challenging":[88],"data":[89,95],"containing":[90],"background":[91],"points":[96],"indicate":[98],"this":[100],"helps":[102],"both":[106],"speaker":[107],"environment":[109],"adaptation.":[110],"To":[111],"gain":[112],"further":[113],"insight":[114],"into":[115],"ability":[117],"long":[120],"short-term":[121],"memory":[122],"(LSTM)":[123],"long-term":[129],"context,":[130],"we":[131],"also":[132],"visualize":[133],"RNN-T":[134],"loss":[135],"gradients":[136],"with":[137],"respect":[138],"input.":[141]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
