{"id":"https://openalex.org/W7161292527","doi":"https://doi.org/10.48550/arxiv.2605.14427","title":"A Calculus-Based Framework for Determining Vocabulary Size in End-to-End ASR","display_name":"A Calculus-Based Framework for Determining Vocabulary Size in End-to-End ASR","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161292527","doi":"https://doi.org/10.48550/arxiv.2605.14427"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.14427","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14427","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.14427","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047383705","display_name":"Sunil Kumar Kopparapu","orcid":"https://orcid.org/0000-0002-0502-527X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kopparapu, Sunil Kumar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5047383705"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9434999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9434999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.009499999694526196,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.009100000374019146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.8202999830245972},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.5145000219345093},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.47130000591278076},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4296000003814697},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.37770000100135803}],"concepts":[{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.8202999830245972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7735999822616577},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5424000024795532},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.5145000219345093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4999000132083893},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.47130000591278076},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4255000054836273},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C170003942","wikidata":"https://www.wikidata.org/wiki/Q861066","display_name":"Display size","level":3,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.14427","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14427","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.14427","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14427","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7559071183204651,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,26,155],"hybrid":[1],"automatic":[2],"speech":[3],"recognition":[4],"(ASR)":[5],"systems,":[6],"the":[7,15,24,40,51,80,89,116,133,148,166,171,184,190,208,216,219,233],"vocabulary":[8,55,81,102,134,185,212,234],"size":[9,52,82,103,135,186,213,235],"is":[10,56,110],"unambiguous,":[11],"typically":[12],"determined":[13],"by":[14,163,197],"number":[16],"of":[17,53,173,194,211,218,224],"phones,":[18],"bi-phones,":[19],"or":[20,113],"tri-phones":[21],"present":[22],"in":[23,60,104,115,179,227],"language.":[25],"contrast,":[27],"end-to-end":[28,62,139,241],"ASR":[29,63,93,242],"systems":[30],"derive":[31],"their":[32,105],"vocabulary,":[33],"often":[34],"referred":[35],"to":[36,87,131,181,231],"as":[37,68,83,151],"tokens":[38],"from":[39],"text":[41],"corpus":[42,204],"used":[43],"for":[44,138,238],"training.":[45,94],"The":[46,221],"choice":[47,210],"and,":[48],"more":[49],"importantly,":[50],"this":[54,156,225],"a":[57,100,142,152,201],"critical":[58],"hyper-parameter":[59,86,214],"training":[61,106,167,239],"systems.":[64],"Tokenization":[65],"algorithms":[66],"such":[67],"Byte":[69],"Pair":[70],"Encoding":[71],"(BPE),":[72],"WordPiece,":[73],"and":[74,169,175,192,205],"Unigram":[75],"Language":[76],"Model":[77],"(ULM)":[78],"use":[79],"an":[84,129,229,240],"input":[85],"generate":[88],"sub-words":[90],"employed":[91],"during":[92],"Popular":[95],"toolkits":[96],"like":[97],"ESPNet":[98],"provide":[99],"fixed":[101],"recipes,":[107],"but":[108],"there":[109],"little":[111],"documentation":[112],"discussion":[114],"literature":[117],"regarding":[118],"how":[119],"these":[120],"values":[121],"are":[122],"determined.":[123],"Recent":[124],"work":[125],"[1]":[126],"has":[127],"formalized":[128],"approach":[130,196,230],"identify":[132,232],"best":[136,236],"suited":[137,237],"ASR,":[140],"introducing":[141],"cost":[143],"function":[144],"framework":[145],"that":[146,161,207],"treats":[147],"tokenization":[149],"process":[150],"black":[153],"box.":[154],"paper,":[157],"we":[158],"build":[159],"upon":[160],"foundation":[162],"curve":[164],"fitting":[165],"data":[168],"using":[170],"principle":[172],"first":[174],"second":[176],"derivative":[177],"tests":[178],"calculus":[180],"formally":[182],"estimate":[183],"hyper-parameter.":[187],"We":[188],"demonstrate":[189],"utility":[191],"usefulness":[193],"our":[195],"applying":[198],"it":[199],"on":[200],"standard":[202],"Librispeech":[203],"show":[206],"optimal":[209],"improves":[215],"performance":[217],"ASR.":[220],"main":[222],"contribution":[223],"paper":[226],"formalizing":[228],"system.":[243]},"counts_by_year":[],"updated_date":"2026-05-16T06:11:02.277879","created_date":"2026-05-16T00:00:00"}
