{"id":"https://openalex.org/W4392902822","doi":"https://doi.org/10.1109/icassp48485.2024.10447116","title":"Improving Speech Recognition for African American English with Audio Classification","display_name":"Improving Speech Recognition for African American English with Audio Classification","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902822","doi":"https://doi.org/10.1109/icassp48485.2024.10447116"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447116","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047141122","display_name":"Shefali Garg","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shefali Garg","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069872293","display_name":"Zhouyuan Huo","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhouyuan Huo","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032798707","display_name":"Khe Chai Sim","orcid":"https://orcid.org/0000-0002-0866-2223"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khe Chai Sim","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042375515","display_name":"Suzan Schwartz","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suzan Schwartz","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114042018","display_name":"M. Chua","orcid":"https://orcid.org/0009-0009-2974-4509"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mason Chua","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048487072","display_name":"Al\u00ebna Aks\u00ebnova","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Al\u00ebna Aks\u00ebnova","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062473985","display_name":"Tsendsuren Munkhdalai","orcid":"https://orcid.org/0000-0002-8783-4993"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tsendsuren Munkhdalai","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009090788","display_name":"Levi King","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Levi King","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109582636","display_name":"Darryl Wright","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Darryl Wright","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007417210","display_name":"Zion Mengesha","orcid":"https://orcid.org/0000-0001-6587-7485"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zion Mengesha","raw_affiliation_strings":["Google LLC","Stanford University"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014088530","display_name":"Dongseong Hwang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongseong Hwang","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara Sainath","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110711540","display_name":"Fran\u00e7oise Beaufays","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fran\u00e7oise Beaufays","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048766619","display_name":"Pedro Moreno Mengibar","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pedro Moreno Mengibar","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5047141122"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.7301,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7311953,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"12356","last_page":"12360"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8346483707427979},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7087746858596802},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6512476205825806},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6188436150550842},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6107811331748962},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5252532362937927},{"id":"https://openalex.org/keywords/american-english","display_name":"American English","score":0.510499119758606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48837512731552124},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.43315309286117554},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3674167990684509},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.25081759691238403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8346483707427979},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7087746858596802},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6512476205825806},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6188436150550842},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6107811331748962},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5252532362937927},{"id":"https://openalex.org/C2777939226","wikidata":"https://www.wikidata.org/wiki/Q7976","display_name":"American English","level":2,"score":0.510499119758606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48837512731552124},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.43315309286117554},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3674167990684509},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.25081759691238403},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447116","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2121879602","https://openalex.org/W2981732023","https://openalex.org/W3008181812","https://openalex.org/W3012624518","https://openalex.org/W3015271175","https://openalex.org/W3016234571","https://openalex.org/W3097777922","https://openalex.org/W3160766462","https://openalex.org/W3215236363","https://openalex.org/W4220718380","https://openalex.org/W4225665589","https://openalex.org/W4240984273","https://openalex.org/W4280618630","https://openalex.org/W4296069263","https://openalex.org/W4296070390","https://openalex.org/W4320930577","https://openalex.org/W4375869211","https://openalex.org/W4386566708","https://openalex.org/W6746023985","https://openalex.org/W6755477022","https://openalex.org/W6771467084","https://openalex.org/W6810125720","https://openalex.org/W6810649735","https://openalex.org/W6810673746","https://openalex.org/W6838302303"],"related_works":["https://openalex.org/W3081187864","https://openalex.org/W4380605396","https://openalex.org/W2803306015","https://openalex.org/W3133352777","https://openalex.org/W151018310","https://openalex.org/W2784059283","https://openalex.org/W4319779560","https://openalex.org/W2008737763","https://openalex.org/W4385611764","https://openalex.org/W2519224033"],"abstract_inverted_index":{"Automatic":[0],"speech":[1,67],"recognition":[2],"(ASR)":[3],"systems":[4],"have":[5,9],"been":[6],"shown":[7],"to":[8,22,26,30,58,89,94],"large":[10,129],"quality":[11],"disparities":[12],"between":[13,154],"the":[14,60,113],"language":[15],"varieties":[16],"they":[17],"are":[18],"intended":[19],"or":[20,32,102],"expected":[21],"recognize.":[23],"One":[24],"way":[25,57],"mitigate":[27],"this":[28,40,142],"is":[29,100],"train":[31,90],"fine-tune":[33],"models":[34],"with":[35,116],"more":[36],"representative":[37],"datasets.":[38],"But":[39],"approach":[41],"can":[42,121],"be":[43],"hindered":[44],"by":[45],"limited":[46],"in-domain":[47],"data":[48,143],"for":[49,135],"training":[50],"and":[51,85,156],"evaluation.":[52],"We":[53,81],"propose":[54],"a":[55,63,70,123,128,146],"new":[56],"improve":[59],"robustness":[61],"of":[62,73,125,131],"US":[64],"English":[65,78,109],"short-form":[66,133],"recognizer":[68],"using":[69],"small":[71],"amount":[72],"out-of-domain":[74],"(long-form)":[75],"African":[76],"American":[77,108],"(AAE)":[79],"data.":[80],"use":[82],"CORAAL,":[83],"YouTube":[84],"Mozilla":[86],"Common":[87],"Voice":[88],"an":[91,98],"audio":[92],"classifier":[93,114],"approximately":[95],"output":[96,115],"whether":[97],"utterance":[99],"AAE":[101,155],"some":[103],"other":[104],"variety":[105],"including":[106],"Mainstream":[107],"(MAE).":[110],"By":[111],"combining":[112],"coarse":[117],"geographic":[118],"information,":[119],"we":[120],"select":[122],"subset":[124],"utterances":[126],"from":[127],"corpus":[130],"untranscribed":[132],"queries":[134],"semi-supervised":[136],"learning":[137],"at":[138],"scale.":[139],"Fine-tuning":[140],"on":[141],"results":[144],"in":[145],"38.5%":[147],"relative":[148],"word":[149],"error":[150],"rate":[151],"disparity":[152],"reduction":[153],"MAE":[157,160],"without":[158],"reducing":[159],"quality.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
