{"id":"https://openalex.org/W3110524561","doi":"https://doi.org/10.21437/interspeech.2021-1390","title":"Adapt-and-Adjust: Overcoming the Long-Tail Problem of Multilingual Speech Recognition","display_name":"Adapt-and-Adjust: Overcoming the Long-Tail Problem of Multilingual Speech Recognition","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3110524561","doi":"https://doi.org/10.21437/interspeech.2021-1390","mag":"3110524561"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-1390","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.01687","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085516032","display_name":"Genta Indra Winata","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155268","display_name":"Salesforce (United States)","ror":"https://ror.org/057315g56","country_code":"US","type":"company","lineage":["https://openalex.org/I4210155268"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Genta Indra Winata","raw_affiliation_strings":["Salesforce#TAB#"],"affiliations":[{"raw_affiliation_string":"Salesforce#TAB#","institution_ids":["https://openalex.org/I4210155268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085034441","display_name":"Guangsen Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155268","display_name":"Salesforce (United States)","ror":"https://ror.org/057315g56","country_code":"US","type":"company","lineage":["https://openalex.org/I4210155268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangsen Wang","raw_affiliation_strings":["Salesforce#TAB#"],"affiliations":[{"raw_affiliation_string":"Salesforce#TAB#","institution_ids":["https://openalex.org/I4210155268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032046813","display_name":"Caiming Xiong","orcid":"https://orcid.org/0000-0003-0349-8628"},"institutions":[{"id":"https://openalex.org/I4210155268","display_name":"Salesforce (United States)","ror":"https://ror.org/057315g56","country_code":"US","type":"company","lineage":["https://openalex.org/I4210155268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Caiming Xiong","raw_affiliation_strings":["Salesforce#TAB#"],"affiliations":[{"raw_affiliation_string":"Salesforce#TAB#","institution_ids":["https://openalex.org/I4210155268"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074834854","display_name":"Steven C. H. Hoi","orcid":"https://orcid.org/0000-0002-4584-3453"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Steven Hoi","raw_affiliation_strings":[", Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":", Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5085516032"],"corresponding_institution_ids":["https://openalex.org/I4210155268"],"apc_list":null,"apc_paid":null,"fwci":1.5378,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.85460068,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8084163665771484},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.729363739490509},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6358572840690613},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5844307541847229},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5218995213508606},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4910776615142822},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4741002023220062},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44351521134376526},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.387445867061615},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34865641593933105},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.14497888088226318}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8084163665771484},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.729363739490509},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6358572840690613},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5844307541847229},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5218995213508606},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4910776615142822},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4741002023220062},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44351521134376526},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.387445867061615},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34865641593933105},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.14497888088226318},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.21437/interspeech.2021-1390","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2012.01687","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.01687","pdf_url":"https://arxiv.org/pdf/2012.01687","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3110524561","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2012.01687.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-115921","is_oa":false,"landing_page_url":"http://lbdiscover.ust.hk/uresolver?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/HKUST:SPI&rft.genre=article&rft.issn=2308-457X&rft.volume=v. 1&rft.issue=&rft.date=2021&rft.spage=361&rft.aulast=Winata&rft.aufirst=G.I.&rft.atitle=Adapt-and-adjust%3A+Overcoming+the+long-tail+problem+of+multilingual+speech+recognition&rft.title=Proceedings+of+the+Annual+Conference+of+the+International+Speech+Communication+Association%2C+INTERSPEECH","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"},{"id":"pmh:oai:repository.ust.hk:1783.1-115921","is_oa":false,"landing_page_url":"http://repository.ust.hk/ir/Record/1783.1-115921","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"},{"id":"doi:10.48550/arxiv.2012.01687","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2012.01687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2012.01687","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.01687","pdf_url":"https://arxiv.org/pdf/2012.01687","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3110524561.pdf","grobid_xml":"https://content.openalex.org/works/W3110524561.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W85350352","https://openalex.org/W1855892484","https://openalex.org/W1965555277","https://openalex.org/W1986174057","https://openalex.org/W2104539041","https://openalex.org/W2119191234","https://openalex.org/W2127141656","https://openalex.org/W2131427446","https://openalex.org/W2143612262","https://openalex.org/W2148143831","https://openalex.org/W2148226542","https://openalex.org/W2397987315","https://openalex.org/W2461918431","https://openalex.org/W2795935804","https://openalex.org/W2892009249","https://openalex.org/W2950304420","https://openalex.org/W2952470929","https://openalex.org/W2962893195","https://openalex.org/W2963211188","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963691377","https://openalex.org/W2963979492","https://openalex.org/W2964303773","https://openalex.org/W2964309797","https://openalex.org/W2970206392","https://openalex.org/W2970941190","https://openalex.org/W2971840980","https://openalex.org/W2972389417","https://openalex.org/W2978017171","https://openalex.org/W2995197345","https://openalex.org/W3015697211","https://openalex.org/W3023911605","https://openalex.org/W3030437843","https://openalex.org/W3035390927","https://openalex.org/W3035552357","https://openalex.org/W3037057938","https://openalex.org/W3042496707","https://openalex.org/W3042667808","https://openalex.org/W3096032230"],"related_works":["https://openalex.org/W3197845195","https://openalex.org/W3099782249","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2127141656","https://openalex.org/W2743818757","https://openalex.org/W2964138625","https://openalex.org/W2948552313","https://openalex.org/W3153675281","https://openalex.org/W2586290575","https://openalex.org/W3015944346","https://openalex.org/W2809695831","https://openalex.org/W2891616026","https://openalex.org/W2980300246","https://openalex.org/W3105242324","https://openalex.org/W3207041304","https://openalex.org/W2891816510","https://openalex.org/W2993078836","https://openalex.org/W2885706078","https://openalex.org/W2769437540"],"abstract_inverted_index":{"One":[0],"crucial":[1],"challenge":[2],"of":[3,27,33,81,89,119],"real-world":[4],"multilingual":[5,56,73],"speech":[6,57],"recognition":[7],"is":[8],"the":[9,39,63,79,102,111,117,120,128],"long-tailed":[10],"distribution":[11],"problem,":[12,41],"where":[13],"some":[14],"resource-rich":[15],"languages":[16,29],"like":[17],"English":[18],"have":[19,30],"abundant":[20],"training":[21,35,114],"data,":[22],"but":[23],"a":[24,49,71],"long":[25],"tail":[26],"low-resource":[28,82],"varying":[31],"amounts":[32],"limited":[34],"data.":[36],"To":[37],"overcome":[38],"long-tail":[40,64],"in":[42,110],"this":[43],"paper,":[44],"we":[45],"propose":[46],"Adapt-and-Adjust":[47],"(A2),":[48],"transformer-based":[50],"multi-task":[51],"learning":[52],"framework":[53,61],"for":[54],"end-to-end":[55],"recognition.":[58],"The":[59],"A2":[60,133],"overcomes":[62],"problem":[65],"via":[66],"three":[67],"techniques:":[68],"(1)":[69],"exploiting":[70],"pretrained":[72],"language":[74],"model":[75],"(mBERT)":[76],"to":[77],"improve":[78],"performance":[80],"languages;":[83],"(2)":[84],"proposing":[85],"dual":[86],"adapters":[87],"consisting":[88],"both":[90],"language-specific":[91],"and":[92,99],"language-agnostic":[93],"adaptation":[94],"with":[95],"minimal":[96],"additional":[97],"parameters;":[98],"(3)":[100],"overcoming":[101],"class":[103,108],"imbalance,":[104],"either":[105],"by":[106],"imposing":[107],"priors":[109],"loss":[112],"during":[113,123],"or":[115],"adjusting":[116],"logits":[118],"softmax":[121],"output":[122],"inference.":[124],"Extensive":[125],"experiments":[126],"on":[127],"CommonVoice":[129],"corpus":[130],"show":[131],"that":[132],"significantly":[134],"outperforms":[135],"conventional":[136],"approaches.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":9}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
