{"id":"https://openalex.org/W4372347050","doi":"https://doi.org/10.1109/icassp49357.2023.10096983","title":"Federated Self-Learning with Weak Supervision for Speech Recognition","display_name":"Federated Self-Learning with Weak Supervision for Speech Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372347050","doi":"https://doi.org/10.1109/icassp49357.2023.10096983"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096983","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096983","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031236216","display_name":"Milind Rao","orcid":"https://orcid.org/0000-0003-2649-3205"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Milind Rao","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063725454","display_name":"Gopinath Chennupati","orcid":"https://orcid.org/0000-0002-6223-8570"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gopinath Chennupati","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003334789","display_name":"Gautam Tiwari","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gautam Tiwari","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019965945","display_name":"Anit Kumar Sahu","orcid":"https://orcid.org/0000-0002-4083-0418"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anit Kumar Sahu","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109046112","display_name":"Anirudh Raju","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirudh Raju","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110230355","display_name":"Ariya Rastrow","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ariya Rastrow","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012153296","display_name":"Jasha Droppo","orcid":"https://orcid.org/0000-0001-6097-0090"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jasha Droppo","raw_affiliation_strings":["Amazon Alexa AI,U.S.A","Amazon Alexa AI, U.S.A"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,U.S.A","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, U.S.A","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5031236216"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":1.1764,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80630742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8429666757583618},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.7539416551589966},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.5921042561531067},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5659060478210449},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.48381465673446655},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.48359960317611694},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.48277392983436584},{"id":"https://openalex.org/keywords/session","display_name":"Session (web analytics)","score":0.46983736753463745},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.46575480699539185},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4634888470172882},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4336671233177185},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3535653054714203},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3295716643333435}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8429666757583618},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.7539416551589966},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.5921042561531067},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5659060478210449},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.48381465673446655},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.48359960317611694},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.48277392983436584},{"id":"https://openalex.org/C2779182362","wikidata":"https://www.wikidata.org/wiki/Q17126187","display_name":"Session (web analytics)","level":2,"score":0.46983736753463745},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.46575480699539185},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4634888470172882},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4336671233177185},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3535653054714203},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3295716643333435},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096983","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096983","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2119717200","https://openalex.org/W2131342762","https://openalex.org/W2143612262","https://openalex.org/W2560647685","https://openalex.org/W2799040448","https://openalex.org/W2936774411","https://openalex.org/W2962907457","https://openalex.org/W2963747784","https://openalex.org/W2963979492","https://openalex.org/W3038119500","https://openalex.org/W3094793624","https://openalex.org/W3095350795","https://openalex.org/W3095866088","https://openalex.org/W3097714942","https://openalex.org/W3100460087","https://openalex.org/W3160525311","https://openalex.org/W3161631007","https://openalex.org/W3209984917","https://openalex.org/W4221145109","https://openalex.org/W4225755266","https://openalex.org/W4297841396","https://openalex.org/W4297841489","https://openalex.org/W4297841802","https://openalex.org/W4318619660","https://openalex.org/W6638749077","https://openalex.org/W6728757088","https://openalex.org/W6803164887","https://openalex.org/W6810007534"],"related_works":["https://openalex.org/W4289718052","https://openalex.org/W2164121020","https://openalex.org/W2145559838","https://openalex.org/W3116498279","https://openalex.org/W4287549553","https://openalex.org/W3183027292","https://openalex.org/W2974871044","https://openalex.org/W4310285384","https://openalex.org/W2794885965","https://openalex.org/W2104218666"],"abstract_inverted_index":{"Automatic":[0],"speech":[1],"recognition":[2],"(ASR)":[3],"models":[4,35],"with":[5,69,112,147,168],"low-footprint":[6],"are":[7,118],"increasingly":[8],"being":[9],"deployed":[10],"on":[11,164,171],"edge":[12],"devices":[13],"for":[14,29,132,158],"conversational":[15,114],"agents,":[16],"which":[17],"enhances":[18],"privacy.":[19],"We":[20],"study":[21,61],"the":[22,37,62,113,128,176],"problem":[23],"of":[24,40,64,80,110,130,178],"federated":[25],"continual":[26],"incremental":[27],"learning":[28,41,146],"recurrent":[30],"neural":[31],"network-transducer":[32],"(RNN-T)":[33],"ASR":[34,56,81],"in":[36,107,120,162,175],"privacy-enhancing":[38],"scheme":[39],"on-device,":[42],"without":[43],"access":[44],"to":[45,126],"ground":[46],"truth":[47],"human":[48],"transcripts":[49],"or":[50],"machine":[51],"transcriptions":[52],"from":[53,101],"a":[54,65,70,108,121,148],"stronger":[55],"model.":[57],"In":[58],"particular,":[59],"we":[60,84,135],"performance":[63,129],"self-learning":[66,131],"based":[67],"scheme,":[68],"paired":[71],"teacher":[72],"model":[73],"updated":[74],"through":[75],"an":[76],"exponential":[77],"moving":[78],"average":[79],"models.":[82],"Further,":[83],"propose":[85],"using":[86,151],"possibly":[87],"noisy":[88],"weak-supervision":[89],"signals":[90,117,180],"such":[91,181],"as":[92,182],"feedback":[93],"scores":[94],"and":[95],"natural":[96],"language":[97],"understanding":[98],"semantics":[99],"determined":[100],"user":[102],"behavior":[103],"across":[104],"multiple":[105],"turns":[106],"session":[109],"interactions":[111],"agent.":[115],"These":[116,155],"leveraged":[119],"multitask":[122],"policy-gradient":[123],"training":[124],"approach":[125,150],"improve":[127],"ASR.":[133],"Finally,":[134],"show":[136],"how":[137],"catastrophic":[138],"forgetting":[139],"can":[140],"be":[141],"mitigated":[142],"by":[143],"combining":[144],"on-device":[145],"memory-replay":[149],"selected":[152],"historical":[153],"datasets.":[154],"innovations":[156],"allow":[157],"10%":[159],"relative":[160],"improvement":[161],"WER":[163],"new":[165],"use":[166],"cases":[167],"minimal":[169],"degradation":[170],"other":[172],"test":[173],"sets":[174],"absence":[177],"strong-supervision":[179],"ground-truth":[183],"transcriptions.":[184]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
