{"id":"https://openalex.org/W3213029956","doi":"https://doi.org/10.21437/interspeech.2022-143","title":"XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale","display_name":"XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W3213029956","doi":"https://doi.org/10.21437/interspeech.2022-143","mag":"3213029956"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-143","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-143","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083494883","display_name":"Arun Babu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Arun Babu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087491225","display_name":"Changhan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Changhan Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050838145","display_name":"Kushal Lakhotia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kushal Lakhotia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102647000","display_name":"Qiantong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiantong Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075834790","display_name":"Naman Goyal","orcid":"https://orcid.org/0000-0002-7565-4303"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naman Goyal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091502081","display_name":"Kritika Singh","orcid":"https://orcid.org/0000-0002-6637-1571"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kritika Singh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025097230","display_name":"Patrick von Platen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patrick von Platen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051165898","display_name":"Yatharth Saraf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yatharth Saraf","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058915697","display_name":"Juan Pino","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juan Pino","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031979802","display_name":"Alexei Baevski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexei Baevski","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068394403","display_name":"Alexis Conneau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexis Conneau","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5083212922","display_name":"Michael Auli","orcid":"https://orcid.org/0000-0001-5974-4459"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Auli","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5083494883"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":47.2455,"has_fulltext":false,"cited_by_count":468,"citation_normalized_percentile":{"value":0.99941697,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2278","last_page":"2282"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7579368352890015},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.640841543674469},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6111950278282166},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6052662134170532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5467430949211121},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4219309389591217},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3242166042327881},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.03914099931716919}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7579368352890015},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.640841543674469},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6111950278282166},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6052662134170532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5467430949211121},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4219309389591217},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3242166042327881},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.03914099931716919},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-143","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-143","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2101105183","https://openalex.org/W2124509324","https://openalex.org/W2127141656","https://openalex.org/W2292087804","https://openalex.org/W2331143823","https://openalex.org/W2338908902","https://openalex.org/W2547875792","https://openalex.org/W2671812860","https://openalex.org/W2726515241","https://openalex.org/W2730658205","https://openalex.org/W2811079561","https://openalex.org/W2842511635","https://openalex.org/W2891555348","https://openalex.org/W2896457183","https://openalex.org/W2898630520","https://openalex.org/W2914120296","https://openalex.org/W2933138175","https://openalex.org/W2950170869","https://openalex.org/W2958953787","https://openalex.org/W2962784628","https://openalex.org/W2962942158","https://openalex.org/W2963027641","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2964121744","https://openalex.org/W2970049541","https://openalex.org/W2970119519","https://openalex.org/W2973049979","https://openalex.org/W2975381464","https://openalex.org/W2979476256","https://openalex.org/W2989539713","https://openalex.org/W2995181338","https://openalex.org/W2996159613","https://openalex.org/W2996383576","https://openalex.org/W3002741552","https://openalex.org/W3005680577","https://openalex.org/W3015213852","https://openalex.org/W3016181583","https://openalex.org/W3030437843","https://openalex.org/W3032816972","https://openalex.org/W3034978746","https://openalex.org/W3035390927","https://openalex.org/W3035524453","https://openalex.org/W3035579820","https://openalex.org/W3036601975","https://openalex.org/W3054645415","https://openalex.org/W3082274269","https://openalex.org/W3093517588","https://openalex.org/W3093579165","https://openalex.org/W3095410713","https://openalex.org/W3098903812","https://openalex.org/W3099782249","https://openalex.org/W3102342027","https://openalex.org/W3107826490","https://openalex.org/W3119308075","https://openalex.org/W3119866685","https://openalex.org/W3139878283","https://openalex.org/W3144173820","https://openalex.org/W3156643189","https://openalex.org/W3159134453","https://openalex.org/W3159481202","https://openalex.org/W3160525311","https://openalex.org/W3160799772","https://openalex.org/W3167533889","https://openalex.org/W3169483174","https://openalex.org/W3173767661","https://openalex.org/W3174724858","https://openalex.org/W3185293939","https://openalex.org/W3193521535","https://openalex.org/W3197580070","https://openalex.org/W3198429080","https://openalex.org/W3198771897","https://openalex.org/W3204696009","https://openalex.org/W3214173179","https://openalex.org/W4210463634","https://openalex.org/W4226033575","https://openalex.org/W4287213456","https://openalex.org/W4287391717","https://openalex.org/W4288089799","https://openalex.org/W4292779060","https://openalex.org/W4297808394","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2795079307","https://openalex.org/W2793058541","https://openalex.org/W3118638206","https://openalex.org/W4324271173","https://openalex.org/W1983629434","https://openalex.org/W1967645776","https://openalex.org/W2352227742","https://openalex.org/W1480923359","https://openalex.org/W2062484128"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"XLS-R,":[3],"a":[4,26,52,119,152],"large-scale":[5],"model":[6,134],"for":[7,167],"cross-lingual":[8,136],"speech":[9,32,68,91,148,164],"representation":[10],"learning":[11],"based":[12],"on":[13,23,101,115,125],"wav2vec":[14],"2.0.We":[15],"train":[16],"models":[17],"with":[18,132],"up":[19],"to":[20,162],"2B":[21],"parameters":[22],"nearly":[24],"half":[25],"million":[27],"hours":[28],"of":[29,39,55,76,82,122,171],"publicly":[30],"available":[31,177],"audio":[33],"in":[34],"128":[35],"languages,":[36,61,151],"an":[37,80],"order":[38],"magnitude":[40],"more":[41,169],"public":[42],"data":[43,58],"than":[44],"the":[45,66,73,77,96,123,172],"largest":[46],"known":[47,98],"prior":[48,99],"work.Our":[49],"evaluation":[50],"covers":[51],"wide":[53],"range":[54],"tasks,":[56],"domains,":[57],"regimes":[59],"and":[60,64,174],"both":[62],"high":[63],"low-resource.On":[65],"CoVoST-2":[67],"translation":[69,87],"benchmark,":[70],"we":[71,129],"improve":[72,163],"previous":[74],"state":[75,121],"art":[78,124],"by":[79,112],"average":[81],"7.4":[83],"BLEU":[84],"over":[85,95],"21":[86],"directions":[88],"into":[89,149],"English.For":[90],"recognition,":[92],"XLS-R":[93,159],"improves":[94],"best":[97],"work":[100],"BABEL,":[102],"MLS,":[103],"CommonVoice":[104],"as":[105,107,140,142],"well":[106,141],"VoxPopuli,":[108],"lowering":[109],"error":[110],"rates":[111],"14-34%":[113],"relative":[114],"average.XLS-R":[116],"also":[117],"sets":[118],"new":[120],"VoxLin-gua107":[126],"language":[127],"identification.Moreover,":[128],"show":[130],"that":[131],"sufficient":[133],"size,":[135],"pretraining":[137,144],"can":[138,160],"perform":[139],"English-only":[143],"when":[145],"translating":[146],"English":[147],"other":[150],"setting":[153],"which":[154],"favors":[155],"monolingual":[156],"pretraining.We":[157],"hope":[158],"help":[161],"processing":[165],"tasks":[166],"many":[168],"languages":[170],"world.Models":[173],"code":[175],"are":[176],"at":[178],"www.github.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":16},{"year":2025,"cited_by_count":156},{"year":2024,"cited_by_count":115},{"year":2023,"cited_by_count":136},{"year":2022,"cited_by_count":45}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
