{"id":"https://openalex.org/W4386907468","doi":"https://doi.org/10.48550/arxiv.2309.10299","title":"Using fine-tuning and min lookahead beam search to improve Whisper","display_name":"Using fine-tuning and min lookahead beam search to improve Whisper","publication_year":2023,"publication_date":"2023-09-19","ids":{"openalex":"https://openalex.org/W4386907468","doi":"https://doi.org/10.48550/arxiv.2309.10299"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.10299","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.10299","pdf_url":"https://arxiv.org/pdf/2309.10299","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.10299","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113329884","display_name":"Andrea Do","orcid":"https://orcid.org/0000-0002-6897-8553"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Do, Andrea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110331818","display_name":"O.R. Brown","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brown, Oscar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058550256","display_name":"Zhengjie Wang","orcid":"https://orcid.org/0000-0003-3179-3026"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhengjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017298289","display_name":"Nikhil Mathew","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mathew, Nikhil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078514490","display_name":"Zixin Liu","orcid":"https://orcid.org/0000-0001-8633-1182"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076675399","display_name":"Jawwad Ahmed","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed, Jawwad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102855201","display_name":"Cheng Yu","orcid":"https://orcid.org/0000-0002-4495-8793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Cheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5113329884"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.7526999711990356,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.7526999711990356,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7985564470291138},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6529116630554199},{"id":"https://openalex.org/keywords/beam-search","display_name":"Beam search","score":0.6430618762969971},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.5277907252311707},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4684135317802429},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.45849716663360596},{"id":"https://openalex.org/keywords/fine-tuning","display_name":"Fine-tuning","score":0.4573816955089569},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4460853636264801},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.419778048992157},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.41614192724227905},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3216237425804138},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11969643831253052},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11424088478088379},{"id":"https://openalex.org/keywords/particle-physics","display_name":"Particle physics","score":0.0843861997127533},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.0770229697227478}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7985564470291138},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6529116630554199},{"id":"https://openalex.org/C19889080","wikidata":"https://www.wikidata.org/wiki/Q2835852","display_name":"Beam search","level":3,"score":0.6430618762969971},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.5277907252311707},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4684135317802429},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.45849716663360596},{"id":"https://openalex.org/C157524613","wikidata":"https://www.wikidata.org/wiki/Q2828883","display_name":"Fine-tuning","level":2,"score":0.4573816955089569},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4460853636264801},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.419778048992157},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.41614192724227905},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3216237425804138},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11969643831253052},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11424088478088379},{"id":"https://openalex.org/C109214941","wikidata":"https://www.wikidata.org/wiki/Q18334","display_name":"Particle physics","level":1,"score":0.0843861997127533},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0770229697227478},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.10299","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.10299","pdf_url":"https://arxiv.org/pdf/2309.10299","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2309.10299","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.10299","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.10299","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.10299","pdf_url":"https://arxiv.org/pdf/2309.10299","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386907468.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386269615","https://openalex.org/W4390091683","https://openalex.org/W1994919150","https://openalex.org/W58921990","https://openalex.org/W2204575588","https://openalex.org/W2081821176","https://openalex.org/W3084994579","https://openalex.org/W3106196307","https://openalex.org/W4320350517","https://openalex.org/W2998661990"],"abstract_inverted_index":{"The":[0],"performance":[1],"of":[2,17,63,77,102],"Whisper":[3,41,114],"in":[4,27,33,65,132],"low-resource":[5,21],"languages":[6,103],"is":[7,73],"still":[8],"far":[9],"from":[10],"perfect.":[11],"In":[12],"addition":[13],"to":[14,60,80,105,112],"a":[15,74,100,120],"lack":[16],"training":[18],"data":[19,44],"on":[20,42,97],"languages,":[22],"we":[23,39],"identify":[24],"some":[25],"limitations":[26],"the":[28,52,68,92,126],"beam":[29,107,128],"search":[30,129],"algorithm":[31,130],"used":[32,131],"Whisper.":[34,133],"To":[35],"address":[36],"these":[37],"issues,":[38],"fine-tune":[40],"additional":[43],"and":[45,87],"propose":[46],"an":[47,61],"improved":[48],"decoding":[49,90],"algorithm.":[50],"On":[51],"Vietnamese":[53],"language,":[54],"fine-tuning":[55],"Whisper-Tiny":[56,70],"with":[57],"LoRA":[58],"leads":[59],"improvement":[62],"38.49":[64],"WER":[66,93],"over":[67,99],"zero-shot":[69],"setting":[71],"which":[72],"further":[75],"reduction":[76],"1.45":[78],"compared":[79,104],"full-parameter":[81],"fine-tuning.":[82],"Additionally,":[83],"by":[84,95],"using":[85],"Filter-Ends":[86],"Min":[88,123],"Lookahead":[89,124],"algorithms,":[91],"reduces":[94],"2.26":[96],"average":[98],"range":[101],"standard":[106,127],"search.":[108],"These":[109],"results":[110],"generalise":[111],"larger":[113],"model":[115],"sizes.":[116],"We":[117],"also":[118],"prove":[119],"theorem":[121],"that":[122],"outperforms":[125]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
