{"id":"https://openalex.org/W7139064685","doi":"https://doi.org/10.48550/arxiv.2603.16184","title":"Polyglot-Lion: Efficient Multilingual ASR for Singapore via Balanced Fine-Tuning of Qwen3-ASR","display_name":"Polyglot-Lion: Efficient Multilingual ASR for Singapore via Balanced Fine-Tuning of Qwen3-ASR","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7139064685","doi":"https://doi.org/10.48550/arxiv.2603.16184"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16184","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129772864","display_name":"Quy-Anh Dang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dang, Quy-Anh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5012291455","display_name":"Chris Ngo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ngo, Chris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9300000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9300000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.009700000286102295,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.008299999870359898,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6365000009536743},{"id":"https://openalex.org/keywords/fraction","display_name":"Fraction (chemistry)","score":0.5408999919891357},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5206000208854675},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5130000114440918},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.46380001306533813},{"id":"https://openalex.org/keywords/yield","display_name":"Yield (engineering)","score":0.4180999994277954},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.3677999973297119}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7664999961853027},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6365000009536743},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.5408999919891357},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5281000137329102},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5206000208854675},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5130000114440918},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5085999965667725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48969998955726624},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.46380001306533813},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3677999973297119},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.34310001134872437},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2648000121116638},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7861056327819824}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"Polyglot-Lion,":[2],"a":[3,42,92,99,105,149],"family":[4],"of":[5,18,50,85,102,139,151,154],"compact":[6],"multilingual":[7,146],"automatic":[8],"speech":[9,39],"recognition":[10],"(ASR)":[11],"models":[12,27,142],"tailored":[13],"for":[14,114],"the":[15,48,62,75,115,152],"linguistic":[16],"landscape":[17],"Singapore,":[19],"covering":[20],"English,":[21],"Mandarin,":[22],"Tamil,":[23],"and":[24,33,55],"Malay.":[25],"Our":[26],"are":[28],"obtained":[29],"by":[30],"fine-tuning":[31,138],"Qwen3-ASR-0.6B":[32],"Qwen3-ASR-1.7B":[34],"exclusively":[35],"on":[36,104],"publicly":[37],"available":[38],"corpora,":[40],"using":[41],"balanced":[43,137],"sampling":[44],"strategy":[45],"that":[46,61,135],"equalizes":[47],"number":[49],"training":[51,100],"utterances":[52],"per":[53],"language":[54],"deliberately":[56],"omits":[57],"language-tag":[58],"conditioning":[59],"so":[60],"model":[63,93],"learns":[64],"to":[65,112],"identify":[66],"languages":[67],"implicitly":[68],"from":[69],"audio.":[70],"On":[71],"12":[72],"benchmarks":[73],"spanning":[74],"four":[76],"target":[77],"languages,":[78],"Polyglot-Lion-1.7B":[79],"achieves":[80],"an":[81],"average":[82],"error":[83],"rate":[84],"14.85,":[86],"competitive":[87],"with":[88],"MERaLiON-2-10B-ASR":[89],"(14.32)":[90],"-":[91,96],"6x":[94],"larger":[95,155],"while":[97],"incurring":[98],"cost":[101,153],"\\$81":[103],"single":[106],"RTX":[107],"PRO":[108],"6000":[109],"GPU":[110],"compared":[111],"\\$18,862":[113],"128-GPU":[116],"baseline.":[117],"Inference":[118],"throughput":[119],"is":[120],"approximately":[121],"20x":[122],"faster":[123],"than":[124],"MERaLiON":[125],"at":[126,148],"0.10":[127],"s/sample":[128],"versus":[129],"2.02":[130],"s/sample.":[131],"These":[132],"results":[133],"demonstrate":[134],"linguistically":[136],"moderate-scale":[140],"pretrained":[141],"can":[143],"yield":[144],"deployment-ready":[145],"ASR":[147],"fraction":[150],"specialist":[156],"systems.":[157]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-20T00:00:00"}
