{"id":"https://openalex.org/W4406461575","doi":"https://doi.org/10.1109/slt61566.2024.10832215","title":"Resource-Efficient Adaptation of Speech Foundation Models for Multi-Speaker ASR","display_name":"Resource-Efficient Adaptation of Speech Foundation Models for Multi-Speaker ASR","publication_year":2024,"publication_date":"2024-12-02","ids":{"openalex":"https://openalex.org/W4406461575","doi":"https://doi.org/10.1109/slt61566.2024.10832215"},"language":"en","primary_location":{"id":"doi:10.1109/slt61566.2024.10832215","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100372965","display_name":"Weiqing Wang","orcid":"https://orcid.org/0000-0002-9578-819X"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Weiqing Wang","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073181369","display_name":"Kunal Dhawan","orcid":"https://orcid.org/0000-0002-5276-2475"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kunal Dhawan","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000174553","display_name":"Tae\u2010Jin Park","orcid":"https://orcid.org/0000-0001-9057-9201"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taejin Park","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069691915","display_name":"Krishna C. Puvvada","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Krishna C. Puvvada","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022587932","display_name":"Ivan Medennikov","orcid":"https://orcid.org/0000-0001-5381-3433"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ivan Medennikov","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027823948","display_name":"Somshubra Majumdar","orcid":"https://orcid.org/0000-0001-5635-4893"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Somshubra Majumdar","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411301","display_name":"He Huang","orcid":"https://orcid.org/0000-0002-9217-4977"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"He Huang","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040747392","display_name":"Jagadeesh Balam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jagadeesh Balam","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032957280","display_name":"Boris Ginsburg","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boris Ginsburg","raw_affiliation_strings":["NVIDIA,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara,USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100372965"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":0.3445,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.70216338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1224","last_page":"1231"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7908196449279785},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7012909054756165},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6936118006706238},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.6156485676765442},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.43461549282073975},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4159315228462219},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.0606326162815094},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.05181005597114563}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7908196449279785},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7012909054756165},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6936118006706238},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.6156485676765442},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.43461549282073975},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4159315228462219},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0606326162815094},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.05181005597114563},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt61566.2024.10832215","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W123007118","https://openalex.org/W1556857061","https://openalex.org/W1975884129","https://openalex.org/W2075662881","https://openalex.org/W2179562937","https://openalex.org/W2219249508","https://openalex.org/W2405472765","https://openalex.org/W2622566932","https://openalex.org/W2900212944","https://openalex.org/W2902864383","https://openalex.org/W2951130829","https://openalex.org/W2963227667","https://openalex.org/W2963250244","https://openalex.org/W2963477857","https://openalex.org/W2963773971","https://openalex.org/W2963843276","https://openalex.org/W2972712416","https://openalex.org/W2973127116","https://openalex.org/W2974231335","https://openalex.org/W3004750774","https://openalex.org/W3008357631","https://openalex.org/W3020336359","https://openalex.org/W3024400986","https://openalex.org/W3094821064","https://openalex.org/W3095212884","https://openalex.org/W3097777922","https://openalex.org/W3204618061","https://openalex.org/W3208887030","https://openalex.org/W4226491018","https://openalex.org/W4286895682","https://openalex.org/W4323066695","https://openalex.org/W4389315128","https://openalex.org/W4389518416","https://openalex.org/W4391021542","https://openalex.org/W4392902855","https://openalex.org/W4392903223","https://openalex.org/W4402111558","https://openalex.org/W4402112192","https://openalex.org/W6603931906","https://openalex.org/W6678809451","https://openalex.org/W6685848937","https://openalex.org/W6688816777","https://openalex.org/W6755552547","https://openalex.org/W6759579507","https://openalex.org/W6767671539","https://openalex.org/W6847363464","https://openalex.org/W6850218400"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2368768466","https://openalex.org/W2757081366","https://openalex.org/W3197877226"],"abstract_inverted_index":{"Speech":[0],"foundation":[1,46,63,136],"models":[2,30,47,137],"have":[3],"achieved":[4],"state-of-the-art":[5],"(SoTA)":[6],"performance":[7],"across":[8],"various":[9],"tasks,":[10],"such":[11],"as":[12],"automatic":[13],"speech":[14,45,53,62,135],"recognition":[15],"(ASR)":[16],"in":[17],"hundreds":[18],"of":[19,91,103,116],"languages.":[20],"However,":[21],"multi-speaker":[22,52,67,139],"ASR":[23,68,140],"remains":[24],"a":[25,61],"challenging":[26],"task":[27,69],"for":[28,65,138],"these":[29],"due":[31],"to":[32,43,48,99],"data":[33,83],"scarcity":[34],"and":[35,50,106],"sparsity.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,59],"present":[41],"approaches":[42],"enable":[44],"process":[49],"understand":[51],"with":[54,142],"limited":[55],"training":[56],"data.":[57,73,145],"Specifically,":[58],"adapt":[60],"model":[64,77,109],"the":[66,75,88,101,114],"using":[70],"only":[71],"telephonic":[72],"Remarkably,":[74],"adapted":[76],"also":[78],"performs":[79],"well":[80],"on":[81,108],"meeting":[82],"without":[84],"any":[85],"fine-tuning,":[86],"demonstrating":[87],"generalization":[89],"ability":[90],"our":[92,117],"approach.":[93],"We":[94],"conduct":[95],"several":[96],"ablation":[97],"studies":[98],"analyze":[100],"impact":[102],"different":[104],"parameters":[105,123],"strategies":[107],"performance.":[110],"Our":[111],"findings":[112],"highlight":[113],"effectiveness":[115],"methods.":[118],"Results":[119],"show":[120],"that":[121],"less":[122],"give":[124],"better":[125],"overall":[126],"cpWER,":[127],"which,":[128],"although":[129],"counterintuitive,":[130],"provides":[131],"insights":[132],"into":[133],"adapting":[134],"tasks":[141],"minimal":[143],"annotated":[144]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
