{"id":"https://openalex.org/W4402112262","doi":"https://doi.org/10.21437/interspeech.2024-552","title":"Quantifying the Role of Textual Predictability in Automatic Speech Recognition","display_name":"Quantifying the Role of Textual Predictability in Automatic Speech Recognition","publication_year":2024,"publication_date":"2024-09-01","ids":{"openalex":"https://openalex.org/W4402112262","doi":"https://doi.org/10.21437/interspeech.2024-552"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2024-552","is_oa":false,"landing_page_url":"http://dx.doi.org/10.21437/interspeech.2024-552","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.16537","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065402089","display_name":"Sean Robertson","orcid":"https://orcid.org/0000-0001-9680-3753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sean Robertson","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052428595","display_name":"Gerald Penn","orcid":"https://orcid.org/0000-0003-3553-8305"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gerald Penn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5069239428","display_name":"Ewan Dunbar","orcid":"https://orcid.org/0000-0001-9603-953X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ewan Dunbar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3055,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.64118354,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"4029","last_page":"4033"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/predictability","display_name":"Predictability","score":0.8462855815887451},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8141076564788818},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6742805242538452},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.630154550075531},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.623386025428772},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5495914816856384},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.5363617539405823},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5164223313331604},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.513957679271698},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5021355152130127},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.49006521701812744},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48740580677986145},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4676969051361084},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2818073034286499}],"concepts":[{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.8462855815887451},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8141076564788818},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6742805242538452},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.630154550075531},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.623386025428772},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5495914816856384},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.5363617539405823},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5164223313331604},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.513957679271698},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5021355152130127},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.49006521701812744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48740580677986145},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4676969051361084},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2818073034286499},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2024-552","is_oa":false,"landing_page_url":"http://dx.doi.org/10.21437/interspeech.2024-552","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2407.16537","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.16537","pdf_url":"https://arxiv.org/pdf/2407.16537","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.16537","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.16537","pdf_url":"https://arxiv.org/pdf/2407.16537","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2756221148","display_name":null,"funder_award_id":"RGPIN-2022-04431","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320322015","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087"},{"id":"https://openalex.org/F4320331257","display_name":"Alliance de recherche num\u00e9rique du Canada","ror":"https://ror.org/010r6td27"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402112262.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2121652828","https://openalex.org/W2126322296","https://openalex.org/W2163537793","https://openalex.org/W4386794561","https://openalex.org/W2916997151","https://openalex.org/W2781555308","https://openalex.org/W3021690593","https://openalex.org/W2125343999","https://openalex.org/W4200200210","https://openalex.org/W2161188302"],"abstract_inverted_index":{"A":[0],"long-standing":[1],"question":[2],"in":[3,89,137],"automatic":[4],"speech":[5],"recognition":[6],"research":[7],"is":[8],"how":[9,130],"to":[10,13,19,26,70,102],"attribute":[11],"errors":[12],"the":[14,21,58,64],"ability":[15,25],"of":[16,46,60,81,91,111,125],"a":[17,36,44,52,73,85],"model":[18,20,76],"acoustics,":[22],"versus":[23],"its":[24],"leverage":[27],"higher-order":[28],"context":[29,83],"(lexicon,":[30],"morphology,":[31],"syntax,":[32],"semantics).":[33],"We":[34,66,118,128],"validate":[35],"novel":[37],"approach":[38,132],"which":[39,56],"models":[40],"error":[41],"rates":[42],"as":[43],"function":[45],"relative":[47],"textual":[48,61,82],"predictability,":[49],"and":[50,98,139],"yields":[51],"single":[53],"number,":[54],"$k$,":[55],"measures":[57],"effect":[59],"predictability":[62],"on":[63,105,115],"recognizer.":[65],"use":[67,80,100],"this":[68,131],"method":[69],"demonstrate":[71,119],"that":[72,120],"Wav2Vec":[74],"2.0-based":[75],"makes":[77],"greater":[78],"stronger":[79],"than":[84],"hybrid":[86],"ASR":[87,113],"model,":[88,97],"spite":[90],"not":[92],"using":[93],"an":[94],"explicit":[95],"language":[96],"also":[99],"it":[101],"shed":[103],"light":[104],"recent":[106],"results":[107],"demonstrating":[108],"poor":[109],"performance":[110],"standard":[112],"systems":[114],"African-American":[116],"English.":[117],"these":[121],"mostly":[122],"represent":[123],"failures":[124],"acoustic--phonetic":[126],"modelling.":[127],"show":[129],"can":[133],"be":[134],"used":[135],"straightforwardly":[136],"diagnosing":[138],"improving":[140],"ASR.":[141]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2024-09-02T00:00:00"}
