{"id":"https://openalex.org/W4225274946","doi":"https://doi.org/10.1109/icassp43922.2022.9746223","title":"Efficient Adapter Transfer of Self-Supervised Speech Models for Automatic Speech Recognition","display_name":"Efficient Adapter Transfer of Self-Supervised Speech Models for Automatic Speech Recognition","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225274946","doi":"https://doi.org/10.1109/icassp43922.2022.9746223"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746223","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051254142","display_name":"Bethan Thomas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210160618","display_name":"Huawei Technologies (United Kingdom)","ror":"https://ror.org/056gzgs71","country_code":"GB","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210160618"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Bethan Thomas","raw_affiliation_strings":["Huawei R&#x0026;D UK"],"affiliations":[{"raw_affiliation_string":"Huawei R&#x0026;D UK","institution_ids":["https://openalex.org/I4210160618"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056415825","display_name":"Samuel Kessler","orcid":"https://orcid.org/0009-0007-4940-8575"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Samuel Kessler","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034921884","display_name":"Salah Karout","orcid":null},"institutions":[{"id":"https://openalex.org/I4210160618","display_name":"Huawei Technologies (United Kingdom)","ror":"https://ror.org/056gzgs71","country_code":"GB","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210160618"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Salah Karout","raw_affiliation_strings":["Huawei R&#x0026;D UK"],"affiliations":[{"raw_affiliation_string":"Huawei R&#x0026;D UK","institution_ids":["https://openalex.org/I4210160618"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051254142"],"corresponding_institution_ids":["https://openalex.org/I4210160618"],"apc_list":null,"apc_paid":null,"fwci":5.5089,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.96794872,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.838942289352417},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6583771705627441},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.626132607460022},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.6149684190750122},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.599203884601593},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5890470743179321},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5336635112762451},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4907017946243286},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48484647274017334},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.43967005610466003},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4272216856479645},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.42277273535728455},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.41576915979385376},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4135132431983948},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37559282779693604},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1104910671710968},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10687130689620972}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.838942289352417},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6583771705627441},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.626132607460022},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.6149684190750122},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.599203884601593},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5890470743179321},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5336635112762451},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4907017946243286},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48484647274017334},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.43967005610466003},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4272216856479645},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.42277273535728455},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.41576915979385376},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4135132431983948},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37559282779693604},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1104910671710968},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10687130689620972},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746223","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2842511635","https://openalex.org/W2896457183","https://openalex.org/W2933138175","https://openalex.org/W2963211188","https://openalex.org/W2964303773","https://openalex.org/W2970925270","https://openalex.org/W2971840980","https://openalex.org/W2972943112","https://openalex.org/W2973049979","https://openalex.org/W2995181338","https://openalex.org/W3030437843","https://openalex.org/W3035390927","https://openalex.org/W3036601975","https://openalex.org/W3101498587","https://openalex.org/W3160525311","https://openalex.org/W3172698324","https://openalex.org/W3186596101","https://openalex.org/W3197845195","https://openalex.org/W3209059054","https://openalex.org/W4297808394","https://openalex.org/W6738045163","https://openalex.org/W6755207826","https://openalex.org/W6759579507","https://openalex.org/W6769263558","https://openalex.org/W6771467084","https://openalex.org/W6780218876","https://openalex.org/W6787191599","https://openalex.org/W6799245484"],"related_works":["https://openalex.org/W642007152","https://openalex.org/W2401827384","https://openalex.org/W2355290951","https://openalex.org/W2069501481","https://openalex.org/W2103239478","https://openalex.org/W2552102772","https://openalex.org/W1510046822","https://openalex.org/W2052688117","https://openalex.org/W4294771049","https://openalex.org/W1523214805"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,9],"(SSL)":[2],"is":[3],"a":[4,39,46],"powerful":[5],"tool":[6],"that":[7,145,167],"allows":[8],"of":[10,42,60,101,111,130,141,154],"underlying":[11],"representations":[12],"from":[13],"unlabeled":[14],"data.":[15],"Transformer":[16],"based":[17],"models":[18,35,83],"such":[19,49],"as":[20,50],"wav2vec":[21,95],"2.0":[22,96],"and":[23,108,175],"HuBERT":[24],"are":[25,36,67,72],"leading":[26],"the":[27,30,58,61,99,112,150,155,165],"field":[28],"in":[29,75],"speech":[31],"domain.":[32],"Generally":[33],"these":[34],"fine-tuned":[37],"on":[38],"small":[40,68],"amount":[41],"labeled":[43],"data":[44],"for":[45,63,104],"downstream":[47,105],"task":[48,133],"Automatic":[51],"Speech":[52],"Recognition":[53],"(ASR).":[54],"This":[55],"involves":[56],"re-training":[57],"majority":[59],"model":[62,113],"each":[64],"task.":[65],"Adapters":[66],"lightweight":[69],"modules":[70],"which":[71],"commonly":[73],"used":[74],"Natural":[76],"Language":[77],"Processing":[78],"(NLP)":[79],"to":[80,84,94,97,114,135,161],"adapt":[81],"pre-trained":[82,156,169],"new":[85],"tasks.":[86],"In":[87],"this":[88],"paper":[89],"we":[90,121],"propose":[91],"applying":[92,146],"adapters":[93,120,147],"reduce":[98],"number":[100],"parameters":[102,131],"required":[103],"ASR":[106,124],"tasks,":[107],"increase":[109],"scalability":[110],"multiple":[115],"tasks":[116],"or":[117],"languages.":[118],"Using":[119],"can":[122],"perform":[123],"while":[125],"training":[126],"fewer":[127],"than":[128],"10%":[129],"per":[132],"compared":[134],"full":[136,162],"fine-tuning":[137],"with":[138],"little":[139],"degradation":[140],"performance.":[142],"Ablations":[143],"show":[144],"into":[148],"just":[149],"top":[151],"few":[152],"layers":[153,170],"network":[157],"gives":[158],"similar":[159],"performance":[160],"transfer,":[163],"supporting":[164],"theory":[166],"higher":[168],"encode":[171],"more":[172],"phonemic":[173],"information,":[174],"further":[176],"optimizing":[177],"efficiency.":[178]},"counts_by_year":[{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
