{"id":"https://openalex.org/W4406461738","doi":"https://doi.org/10.1109/slt61566.2024.10832268","title":"Language Bias in Self-Supervised Learning For Automatic Speech Recognition","display_name":"Language Bias in Self-Supervised Learning For Automatic Speech Recognition","publication_year":2024,"publication_date":"2024-12-02","ids":{"openalex":"https://openalex.org/W4406461738","doi":"https://doi.org/10.1109/slt61566.2024.10832268"},"language":"en","primary_location":{"id":"doi:10.1109/slt61566.2024.10832268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/files/472968265/LanguageBiasInSelfSupervisedLearning_accepted_vers_workshop_contrib_.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115905183","display_name":"Edward Storey","orcid":null},"institutions":[{"id":"https://openalex.org/I205274468","display_name":"Trinity College Dublin","ror":"https://ror.org/02tyrky19","country_code":"IE","type":"education","lineage":["https://openalex.org/I205274468"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Edward Storey","raw_affiliation_strings":["Trinity College,Sigmedia Lab, School of Engineering,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Trinity College,Sigmedia Lab, School of Engineering,Dublin,Ireland","institution_ids":["https://openalex.org/I205274468"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042231269","display_name":"Naomi Harte","orcid":"https://orcid.org/0000-0002-9274-209X"},"institutions":[{"id":"https://openalex.org/I205274468","display_name":"Trinity College Dublin","ror":"https://ror.org/02tyrky19","country_code":"IE","type":"education","lineage":["https://openalex.org/I205274468"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Naomi Harte","raw_affiliation_strings":["Trinity College,Sigmedia Lab, School of Engineering,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Trinity College,Sigmedia Lab, School of Engineering,Dublin,Ireland","institution_ids":["https://openalex.org/I205274468"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102911387","display_name":"Peter Bell","orcid":"https://orcid.org/0000-0002-9597-9615"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Bell","raw_affiliation_strings":["The University of Edinburgh,Centre for Speech Technology Research, School of Informatics,UK"],"affiliations":[{"raw_affiliation_string":"The University of Edinburgh,Centre for Speech Technology Research, School of Informatics,UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5115905183"],"corresponding_institution_ids":["https://openalex.org/I205274468"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23739956,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"37","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849167585372925},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6867337226867676},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.496175616979599},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46251407265663147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849167585372925},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6867337226867676},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.496175616979599},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46251407265663147}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/slt61566.2024.10832268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/cd459bf0-d59a-4630-81cd-73288eb6c3fb","is_oa":true,"landing_page_url":"https://hdl.handle.net/20.500.11820/cd459bf0-d59a-4630-81cd-73288eb6c3fb","pdf_url":"https://www.research.ed.ac.uk/files/472968265/LanguageBiasInSelfSupervisedLearning_accepted_vers_workshop_contrib_.pdf","source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Storey, E, Harte, N & Bell, P 2025, Language bias in self-supervised learning for automatic speech recognition. in Proceedings of the 2024 IEEE Spoken Language Technology Workshop. Proceedings of the IEEE Spoken Language Technology Workshop, Institute of Electrical and Electronics Engineers, Singapore, pp. 37-42, IEEE Spoken Language Technology Workshop 2024 , Macau, China, 2/12/24. https://doi.org/10.1109/SLT61566.2024.10832268","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/cd459bf0-d59a-4630-81cd-73288eb6c3fb","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/cd459bf0-d59a-4630-81cd-73288eb6c3fb","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Storey, E, Harte, N & Bell, P 2025, Language bias in self-supervised learning for automatic speech recognition. in Proceedings of the 2024 IEEE Spoken Language Technology Workshop. Proceedings of the IEEE Spoken Language Technology Workshop, Institute of Electrical and Electronics Engineers, Singapore, pp. 37-42, IEEE Spoken Language Technology Workshop 2024 , Macau, China, 2/12/24. https://doi.org/10.1109/SLT61566.2024.10832268","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/cd459bf0-d59a-4630-81cd-73288eb6c3fb","is_oa":true,"landing_page_url":"https://hdl.handle.net/20.500.11820/cd459bf0-d59a-4630-81cd-73288eb6c3fb","pdf_url":"https://www.research.ed.ac.uk/files/472968265/LanguageBiasInSelfSupervisedLearning_accepted_vers_workshop_contrib_.pdf","source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Storey, E, Harte, N & Bell, P 2025, Language bias in self-supervised learning for automatic speech recognition. in Proceedings of the 2024 IEEE Spoken Language Technology Workshop. Proceedings of the IEEE Spoken Language Technology Workshop, Institute of Electrical and Electronics Engineers, Singapore, pp. 37-42, IEEE Spoken Language Technology Workshop 2024 , Macau, China, 2/12/24. https://doi.org/10.1109/SLT61566.2024.10832268","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2322257346","display_name":null,"funder_award_id":"18/CRT/622","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G313089707","display_name":null,"funder_award_id":"13/RC/2106 P2","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G34635314","display_name":null,"funder_award_id":"8/CRT/6224","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G40717054","display_name":null,"funder_award_id":"EP/T024976/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4915346836","display_name":null,"funder_award_id":"18/CRT/6","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G6334448832","display_name":null,"funder_award_id":"13/RC/2106_P2","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G8019802486","display_name":null,"funder_award_id":"18/CRT/6224","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4406461738.pdf"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W282906231","https://openalex.org/W1494198834","https://openalex.org/W2025198378","https://openalex.org/W2964309797","https://openalex.org/W2995181338","https://openalex.org/W3012624518","https://openalex.org/W3036601975","https://openalex.org/W3095410713","https://openalex.org/W3119308075","https://openalex.org/W3168867926","https://openalex.org/W3198429080","https://openalex.org/W3206559778","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3213029956","https://openalex.org/W4221152838","https://openalex.org/W4255733975","https://openalex.org/W4287121455","https://openalex.org/W4297841287","https://openalex.org/W4308242807","https://openalex.org/W4319862635","https://openalex.org/W4372348492","https://openalex.org/W4375869065","https://openalex.org/W4385822397","https://openalex.org/W4385822686","https://openalex.org/W4386898608","https://openalex.org/W4388692690","https://openalex.org/W6638523607","https://openalex.org/W6751979845","https://openalex.org/W6780218876","https://openalex.org/W6796551075","https://openalex.org/W6796581206","https://openalex.org/W6846804489","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,7],"(SSL)":[2],"is":[3],"used":[4],"in":[5,73,79],"deep":[6],"to":[8,35,71,98,120,144],"train":[9,36],"on":[10,37,111,133],"large":[11,23],"datasets":[12],"without":[13],"the":[14,20,49,52,93,106,137,140,145],"need":[15],"for":[16,55],"expensive":[17],"labelling":[18],"of":[19,51,62,108,114],"data.":[21,147],"Recently,":[22],"Automatic":[24],"Speech":[25],"Recognition":[26],"(ASR)":[27],"models":[28],"such":[29],"as":[30],"XLS-R":[31,56,103,125],"have":[32,68],"utilised":[33],"SSL":[34,67,81],"over":[38],"one":[39],"hundred":[40],"different":[41,115],"languages":[42,138],"simultaneously.":[43],"However,":[44],"deeper":[45],"investigation":[46],"shows":[47],"that":[48,122],"bulk":[50],"training":[53],"data":[54,142],"comes":[57],"from":[58,136],"a":[59,112],"small":[60],"number":[61],"languages.":[63,116],"Biases":[64],"learned":[65,135],"through":[66],"been":[69,85],"shown":[70],"exist":[72],"multiple":[74],"domains,":[75],"but":[76],"language":[77],"bias":[78],"multilingual":[80],"ASR":[82],"has":[83],"not":[84],"thoroughly":[86],"examined.":[87],"In":[88],"this":[89],"paper,":[90],"we":[91],"utilise":[92],"Lottery":[94],"Ticket":[95],"Hypothesis":[96],"(LTH)":[97],"identify":[99],"language-specific":[100],"subnetworks":[101,110],"within":[102],"and":[104,130],"test":[105],"performance":[107],"these":[109],"variety":[113],"We":[117],"are":[118],"able":[119],"show":[121],"when":[123],"fine-tuning,":[124],"bypasses":[126],"traditional":[127],"linguistic":[128],"knowledge":[129],"builds":[131],"only":[132],"weights":[134],"with":[139],"largest":[141],"contribution":[143],"pretraining":[146]},"counts_by_year":[],"updated_date":"2026-03-21T08:13:44.787528","created_date":"2025-10-10T00:00:00"}
