{"id":"https://openalex.org/W7127954719","doi":"https://doi.org/10.48550/arxiv.2602.04716","title":"Linguistically Informed Evaluation of Multilingual ASR for African Languages","display_name":"Linguistically Informed Evaluation of Multilingual ASR for African Languages","publication_year":2026,"publication_date":"2026-02-04","ids":{"openalex":"https://openalex.org/W7127954719","doi":"https://doi.org/10.48550/arxiv.2602.04716"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.04716","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125206821","display_name":"Fei-Yueh Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Fei-Yueh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125170395","display_name":"Lateef Adeleke","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adeleke, Lateef","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125125037","display_name":"C. M. Downey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Downey, C. M.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125206821"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.902899980545044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.902899980545044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.0203000009059906,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6614000201225281},{"id":"https://openalex.org/keywords/yoruba","display_name":"Yoruba","score":0.6484000086784363},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5759999752044678},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4702000021934509},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4512999951839447},{"id":"https://openalex.org/keywords/languages-of-africa","display_name":"Languages of Africa","score":0.4433000087738037},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4318000078201294},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.3580999970436096}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6873999834060669},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6614000201225281},{"id":"https://openalex.org/C2777568999","wikidata":"https://www.wikidata.org/wiki/Q34311","display_name":"Yoruba","level":2,"score":0.6484000086784363},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5759999752044678},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5250999927520752},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49129998683929443},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48899999260902405},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C108494575","wikidata":"https://www.wikidata.org/wiki/Q207930","display_name":"Languages of Africa","level":2,"score":0.4433000087738037},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.439300000667572},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4318000078201294},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C2778744346","wikidata":"https://www.wikidata.org/wiki/Q1152224","display_name":"Distinctive feature","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C4768521","wikidata":"https://www.wikidata.org/wiki/Q512366","display_name":"Phonotactics","level":3,"score":0.2858999967575073},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C541956065","wikidata":"https://www.wikidata.org/wiki/Q2250680","display_name":"Speech error","level":3,"score":0.2583000063896179},{"id":"https://openalex.org/C3019641298","wikidata":"https://www.wikidata.org/wiki/Q625642","display_name":"Word length","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.04716","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.04716","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.04716","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.04716","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8202992081642151,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Word":[0],"Error":[1,27],"Rate":[2,28],"(WER)":[3],"mischaracterizes":[4],"ASR":[5],"models'":[6,44],"performance":[7],"for":[8,129],"African":[9,56],"languages":[10,57],"by":[11,58,74,168],"combining":[12],"phonological,":[13],"tone,":[14],"and":[15,63,65,81,107,126,143],"other":[16],"linguistic":[17],"errors":[18,42,76],"into":[19],"a":[20,35,67,118,138],"single":[21],"lexical":[22],"error.":[23],"By":[24],"contrast,":[25],"Feature":[26],"(FER)":[29],"has":[30],"recently":[31],"attracted":[32],"attention":[33],"as":[34],"viable":[36],"metric":[37],"that":[38,73,96],"reveals":[39],"linguistically":[40],"meaningful":[41],"in":[43,121],"performance.":[45],"In":[46],"this":[47],"paper,":[48],"we":[49],"evaluate":[50],"three":[51],"speech":[52],"encoders":[53],"on":[54,77,100,115],"two":[55],"complementing":[59],"WER":[60,142],"with":[61,123,140],"CER,":[62],"FER,":[64],"add":[66],"tone-aware":[68],"extension":[69],"(TER).":[70],"We":[71],"show":[72,117],"computing":[75],"phonological":[78],"features,":[79,102],"FER":[80,150],"TER":[82],"reveal":[83,95],"linguistically-salient":[84],"error":[85,156],"patterns":[86],"even":[87],"when":[88],"word-level":[89],"accuracy":[90],"remains":[91],"low.":[92],"Our":[93],"results":[94],"models":[97],"perform":[98],"better":[99],"segmental":[101],"while":[103],"tones":[104],"(especially":[105],"mid":[106],"downstep)":[108],"remain":[109],"the":[110,147],"most":[111],"challenging":[112],"features.":[113],"Results":[114],"Yoruba":[116],"striking":[119],"differential":[120],"metrics,":[122],"WER=0.788,":[124],"CER=0.305,":[125],"FER=0.151.":[127],"Similarly":[128],"Uneme":[130],"(an":[131],"endangered":[132],"language":[133],"absent":[134],"from":[135],"pretraining":[136],"data)":[137],"model":[139,155],"near-total":[141],"0.461":[144],"CER":[145],"achieves":[146],"relatively":[148],"low":[149],"of":[151],"0.267.":[152],"This":[153],"indicates":[154],"is":[157,166],"often":[158],"attributable":[159],"to":[160],"individual":[161],"phonetic":[162],"feature":[163],"errors,":[164],"which":[165],"obscured":[167],"all-or-nothing":[169],"metrics":[170],"like":[171],"WER.":[172]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-07T00:00:00"}
