{"id":"https://openalex.org/W2622301280","doi":"https://doi.org/10.21437/interspeech.2017-639","title":"Optimizing Expected Word Error Rate via Sampling for Speech Recognition","display_name":"Optimizing Expected Word Error Rate via Sampling for Speech Recognition","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2622301280","doi":"https://doi.org/10.21437/interspeech.2017-639","mag":"2622301280"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-639","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1706.02776","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080404214","display_name":"Matt Shannon","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Matt Shannon","raw_affiliation_strings":["Google (United States), Mountain View, United States"],"affiliations":[{"raw_affiliation_string":"Google (United States), Mountain View, United States","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5080404214"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":4.12523348,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.94183308,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3537","last_page":"3541"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.694826066493988},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.681769609451294},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5737714767456055},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5354123115539551},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.47450894117355347},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4661594331264496},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.45929402112960815},{"id":"https://openalex.org/keywords/gibbs-sampling","display_name":"Gibbs sampling","score":0.4453396797180176},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.44091400504112244},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4405997097492218},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.4238879084587097},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3952524960041046},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33543652296066284},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.3011685609817505},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2597159743309021},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.24957332015037537},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.17003116011619568},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.168556809425354}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.694826066493988},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.681769609451294},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5737714767456055},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5354123115539551},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.47450894117355347},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4661594331264496},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.45929402112960815},{"id":"https://openalex.org/C158424031","wikidata":"https://www.wikidata.org/wiki/Q1191905","display_name":"Gibbs sampling","level":3,"score":0.4453396797180176},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.44091400504112244},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4405997097492218},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.4238879084587097},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3952524960041046},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33543652296066284},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3011685609817505},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2597159743309021},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.24957332015037537},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.17003116011619568},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.168556809425354},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2017-639","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1706.02776","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1706.02776","pdf_url":"https://arxiv.org/pdf/1706.02776","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2622301280","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1706.02776.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1706.02776","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1706.02776","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1706.02776","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1706.02776","pdf_url":"https://arxiv.org/pdf/1706.02776","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2622301280.pdf","grobid_xml":"https://content.openalex.org/works/W2622301280.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W10656737","https://openalex.org/W14913773","https://openalex.org/W34922426","https://openalex.org/W38527073","https://openalex.org/W1533416326","https://openalex.org/W2053567709","https://openalex.org/W2102113734","https://openalex.org/W2103980808","https://openalex.org/W2106359072","https://openalex.org/W2119717200","https://openalex.org/W2131342762","https://openalex.org/W2137104525","https://openalex.org/W2147768505","https://openalex.org/W2150907703","https://openalex.org/W2154887136","https://openalex.org/W2155027007","https://openalex.org/W2158339352","https://openalex.org/W2158510249","https://openalex.org/W2398850919","https://openalex.org/W2514741789","https://openalex.org/W2953384591","https://openalex.org/W2963248296","https://openalex.org/W2963920996"],"related_works":["https://openalex.org/W2962765220","https://openalex.org/W2964308564","https://openalex.org/W2127141656","https://openalex.org/W2064675550","https://openalex.org/W1828163288","https://openalex.org/W2327501763","https://openalex.org/W2102113734","https://openalex.org/W2143612262","https://openalex.org/W2525778437","https://openalex.org/W1987238397","https://openalex.org/W1524333225","https://openalex.org/W2773723600","https://openalex.org/W2889871534","https://openalex.org/W2963920996","https://openalex.org/W2962826786","https://openalex.org/W2626778328","https://openalex.org/W2131774270","https://openalex.org/W2131342762","https://openalex.org/W1877570817","https://openalex.org/W2963727906"],"abstract_inverted_index":{"State-level":[0],"minimum":[1],"Bayes":[2],"risk":[3],"(sMBR)":[4],"training":[5,14,51,154],"has":[6,21],"become":[7],"the":[8,26,53,60,73,83,97,112,119,129],"de":[9],"facto":[10],"standard":[11],"for":[12],"sequence-level":[13],"of":[15,56,111,128],"speech":[16],"recognition":[17,170],"acoustic":[18,64,152],"models.":[19],"It":[20,67],"an":[22,134],"elegant":[23],"formulation":[24],"using":[25,42,141],"expectation":[27,84],"semiring,":[28,85],"and":[29,62,86,136],"gives":[30,155],"large":[31],"improvements":[32],"in":[33,159],"word":[34],"error":[35],"rate":[36],"(WER)":[37],"over":[38,161],"models":[39],"trained":[40],"solely":[41],"cross-entropy":[43],"(CE)":[44],"or":[45],"connectionist":[46],"temporal":[47],"classification":[48],"(CTC).":[49],"sMBR":[50,124,164],"optimizes":[52],"expected":[54,74,92,113,130],"number":[55],"frames":[57],"at":[58],"which":[59],"reference":[61],"hypothesized":[63],"states":[65],"differ.":[66],"may":[68,138],"be":[69,139],"preferable":[70],"to":[71,108],"optimize":[72],"WER,":[75],"but":[76],"WER":[77,93,114,131,150,160],"does":[78],"not":[79],"interact":[80],"well":[81],"with":[82],"previous":[87],"approaches":[88],"based":[89],"on":[90,166],"computing":[91],"exactly":[94],"involve":[95],"expanding":[96],"lattices":[98,120],"used":[99,121],"during":[100,122,151],"training.":[101,125],"In":[102],"this":[103],"paper":[104],"we":[105],"show":[106,146],"how":[107],"perform":[109],"optimization":[110],"by":[115],"sampling":[116],"paths":[117],"from":[118],"conventional":[123],"The":[126],"gradient":[127],"is":[132],"itself":[133],"expectation,":[135],"so":[137],"approximated":[140],"Monte":[142],"Carlo":[143],"sampling.":[144],"We":[145],"experimentally":[147],"that":[148],"optimizing":[149],"model":[153],"5%":[156],"relative":[157],"improvement":[158],"a":[162,167],"well-tuned":[163],"baseline":[165],"2-channel":[168],"query":[169],"task":[171],"(Google":[172],"Home).":[173]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
