{"id":"https://openalex.org/W4408353174","doi":"https://doi.org/10.1109/icassp49660.2025.10888110","title":"XLSR-Transducer: Streaming ASR for Self-Supervised Pretrained Models","display_name":"XLSR-Transducer: Streaming ASR for Self-Supervised Pretrained Models","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353174","doi":"https://doi.org/10.1109/icassp49660.2025.10888110"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/bitstreams/4687b11c-a5ba-458d-8834-c8f07f2e912c/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107893868","display_name":"Shashi Kumar","orcid":"https://orcid.org/0009-0008-8561-1068"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Shashi Kumar","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084521938","display_name":"Srikanth Madikeri","orcid":"https://orcid.org/0000-0002-4361-784X"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Srikanth Madikeri","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078502662","display_name":"Juan Zuluaga-G\u00f3mez","orcid":"https://orcid.org/0000-0002-6947-2706"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Juan Zuluaga-Gomez","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075765823","display_name":"Esa\u00fa Villatoro-Tello","orcid":"https://orcid.org/0000-0002-1322-0358"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Esa\u00fa Villatoro-Tello","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109819284","display_name":"Iuliia Thorbecke","orcid":null},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Iuliia Thorbecke","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076409146","display_name":"Petr Motl\u00ed\u010dek","orcid":"https://orcid.org/0000-0001-6467-1119"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Petr Motlicek","raw_affiliation_strings":["Idiap Research Institute,Switzerland"],"affiliations":[{"raw_affiliation_string":"Idiap Research Institute,Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055592276","display_name":"K E Manjunath","orcid":null},"institutions":[{"id":"https://openalex.org/I2799617311","display_name":"Lakeshore Hospital","ror":"https://ror.org/01dm18990","country_code":"IN","type":"healthcare","lineage":["https://openalex.org/I2799617311"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Manjunath K E","raw_affiliation_strings":["Uniphore,India"],"affiliations":[{"raw_affiliation_string":"Uniphore,India","institution_ids":["https://openalex.org/I2799617311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083410051","display_name":"Aravind Ganapathiraju","orcid":null},"institutions":[{"id":"https://openalex.org/I2799617311","display_name":"Lakeshore Hospital","ror":"https://ror.org/01dm18990","country_code":"IN","type":"healthcare","lineage":["https://openalex.org/I2799617311"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aravind Ganapathiraju","raw_affiliation_strings":["Uniphore,India"],"affiliations":[{"raw_affiliation_string":"Uniphore,India","institution_ids":["https://openalex.org/I2799617311"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5107893868"],"corresponding_institution_ids":["https://openalex.org/I7495430"],"apc_list":null,"apc_paid":null,"fwci":5.5188,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.95445024,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9577000141143799,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9577000141143799,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7469704747200012},{"id":"https://openalex.org/keywords/transducer","display_name":"Transducer","score":0.7385458946228027},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5111079812049866},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38796475529670715},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.25777071714401245},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.06495696306228638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7469704747200012},{"id":"https://openalex.org/C56318395","wikidata":"https://www.wikidata.org/wiki/Q215928","display_name":"Transducer","level":2,"score":0.7385458946228027},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5111079812049866},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38796475529670715},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.25777071714401245},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.06495696306228638}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/250009","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/250009","pdf_url":"https://infoscience.epfl.ch/bitstreams/4687b11c-a5ba-458d-8834-c8f07f2e912c/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/250009","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/250009","pdf_url":"https://infoscience.epfl.ch/bitstreams/4687b11c-a5ba-458d-8834-c8f07f2e912c/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408353174.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W108866686","https://openalex.org/W1494198834","https://openalex.org/W2064675550","https://openalex.org/W2125336414","https://openalex.org/W2144499799","https://openalex.org/W2766219058","https://openalex.org/W2903601559","https://openalex.org/W2933138175","https://openalex.org/W2972981541","https://openalex.org/W3015237657","https://openalex.org/W3015686596","https://openalex.org/W3015927303","https://openalex.org/W3016010032","https://openalex.org/W3097777922","https://openalex.org/W3160766462","https://openalex.org/W3198429080","https://openalex.org/W3198484663","https://openalex.org/W3205644108","https://openalex.org/W3213618310","https://openalex.org/W4226033575","https://openalex.org/W4226278833","https://openalex.org/W4283700324","https://openalex.org/W4375869390","https://openalex.org/W4385245566","https://openalex.org/W4385822820","https://openalex.org/W4388017359","https://openalex.org/W4391021542","https://openalex.org/W4392902568","https://openalex.org/W4401042914","https://openalex.org/W4404782769","https://openalex.org/W6631190155","https://openalex.org/W6769806307","https://openalex.org/W6771467084","https://openalex.org/W6781533629","https://openalex.org/W6810259195","https://openalex.org/W6810673746","https://openalex.org/W6847363464","https://openalex.org/W6852909395","https://openalex.org/W6857062747","https://openalex.org/W6857690716"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2012283803","https://openalex.org/W4384820447","https://openalex.org/W2072454424","https://openalex.org/W2117438306","https://openalex.org/W2185942010","https://openalex.org/W2260725127","https://openalex.org/W2004297762"],"abstract_inverted_index":{"Self-supervised":[0],"pretrained":[1,21],"models":[2,22],"exhibit":[3],"competitive":[4],"performance":[5],"in":[6,51,92,136],"automatic":[7],"speech":[8],"recognition":[9],"(ASR)":[10],"on":[11,56,106],"finetuning,":[12],"even":[13],"with":[14,33,117],"limited":[15],"in-domain":[16],"supervised":[17],"data.":[18],"However,":[19],"popular":[20],"are":[23,31],"not":[24],"suitable":[25],"for":[26],"streaming":[27,84],"ASR":[28],"because":[29],"they":[30],"trained":[32,79],"full":[34],"attention":[35,89,121],"context.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,86,123],"introduce":[41],"XLSR-Transducer,":[42],"where":[43],"the":[44,57,62,93,100,118,125],"XLSR-53":[45,101],"model":[46,78],"is":[47],"used":[48],"as":[49],"encoder":[50],"transducer":[52,77],"setup.":[53],"Our":[54],"experiments":[55],"AMI":[58,107],"dataset":[59],"reveal":[60],"that":[61],"XLSR-Transducer":[63,105],"achieves":[64],"4%":[65],"absolute":[66],"WER":[67],"improvement":[68,135],"over":[69,74],"Whisper":[70],"large-v2":[71],"and":[72,108],"8%":[73],"a":[75,132],"Zipformer":[76],"from":[80,111],"scratch.":[81],"To":[82],"enable":[83],"capabilities,":[85],"investigate":[87],"different":[88],"masking":[90],"patterns":[91],"self-attention":[94],"computation":[95],"of":[96,120],"transformer":[97],"layers":[98],"within":[99],"model.":[102],"We":[103],"validate":[104],"5":[109],"languages":[110],"CommonVoice":[112],"under":[113],"low-resource":[114],"scenarios.":[115],"Finally,":[116],"introduction":[119],"sinks,":[122],"reduce":[124],"left":[126],"context":[127],"by":[128],"half":[129],"while":[130],"achieving":[131],"relative":[133],"12%":[134],"WER.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
