{"id":"https://openalex.org/W3170205569","doi":"https://doi.org/10.1109/asru51503.2021.9688028","title":"Kaizen: Continuously Improving Teacher Using Exponential Moving Average for Semi-Supervised Speech Recognition","display_name":"Kaizen: Continuously Improving Teacher Using Exponential Moving Average for Semi-Supervised Speech Recognition","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W3170205569","doi":"https://doi.org/10.1109/asru51503.2021.9688028","mag":"3170205569"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688028","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.07759","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028956985","display_name":"Vimal Manohar","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Vimal Manohar","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107896783","display_name":"T. Likhomanenko","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Tatiana Likhomanenko","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102647000","display_name":"Qiantong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Qiantong Xu","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051950818","display_name":"Wei-Ning Hsu","orcid":"https://orcid.org/0000-0001-5546-5217"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Wei-Ning Hsu","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053915453","display_name":"Ronan Collobert","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ronan Collobert","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051165898","display_name":"Yatharth Saraf","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yatharth Saraf","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069954850","display_name":"Geoffrey Zweig","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Geoffrey Zweig","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103742478","display_name":"Abdelrahman Mohamed","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Abdelrahman Mohamed","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5028956985"],"corresponding_institution_ids":["https://openalex.org/I2252078561"],"apc_list":null,"apc_paid":null,"fwci":0.12261971,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.31579931,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"518","last_page":"525"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.7086957693099976},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7051043510437012},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6890336871147156},{"id":"https://openalex.org/keywords/kaizen","display_name":"Kaizen","score":0.6480283737182617},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5522089004516602},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5292662382125854},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5050562024116516},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4899708330631256},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4751198887825012},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4334111213684082},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4288226366043091},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.42503225803375244},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15105971693992615},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10050651431083679}],"concepts":[{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.7086957693099976},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7051043510437012},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6890336871147156},{"id":"https://openalex.org/C2776600044","wikidata":"https://www.wikidata.org/wiki/Q376444","display_name":"Kaizen","level":3,"score":0.6480283737182617},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5522089004516602},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5292662382125854},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5050562024116516},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4899708330631256},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4751198887825012},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4334111213684082},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4288226366043091},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.42503225803375244},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15105971693992615},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10050651431083679},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C137335462","wikidata":"https://www.wikidata.org/wiki/Q380772","display_name":"Lean manufacturing","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/asru51503.2021.9688028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688028","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.07759","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.07759","pdf_url":"https://arxiv.org/pdf/2106.07759","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3170205569","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2106.07759","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.07759","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.07759","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.07759","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.07759","pdf_url":"https://arxiv.org/pdf/2106.07759","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8799999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3170205569.pdf","grobid_xml":"https://content.openalex.org/works/W3170205569.grobid-xml"},"referenced_works_count":64,"referenced_works":["https://openalex.org/W105703852","https://openalex.org/W1494198834","https://openalex.org/W1686810756","https://openalex.org/W1821462560","https://openalex.org/W1965555277","https://openalex.org/W1993660824","https://openalex.org/W2048060899","https://openalex.org/W2056786202","https://openalex.org/W2064675550","https://openalex.org/W2095705004","https://openalex.org/W2127141656","https://openalex.org/W2134797427","https://openalex.org/W2146502635","https://openalex.org/W2294370754","https://openalex.org/W2402146185","https://openalex.org/W2407080277","https://openalex.org/W2507699225","https://openalex.org/W2514741789","https://openalex.org/W2520160253","https://openalex.org/W2592691248","https://openalex.org/W2747135936","https://openalex.org/W2763421725","https://openalex.org/W2802248956","https://openalex.org/W2889282842","https://openalex.org/W2897098106","https://openalex.org/W2936774411","https://openalex.org/W2940180244","https://openalex.org/W2951970475","https://openalex.org/W2963250244","https://openalex.org/W2963403868","https://openalex.org/W2964121744","https://openalex.org/W2981857663","https://openalex.org/W2991213871","https://openalex.org/W2995181338","https://openalex.org/W2998532468","https://openalex.org/W3008525923","https://openalex.org/W3015522062","https://openalex.org/W3026041220","https://openalex.org/W3094647783","https://openalex.org/W3095350795","https://openalex.org/W3096338464","https://openalex.org/W3099782249","https://openalex.org/W3101821705","https://openalex.org/W3167575587","https://openalex.org/W3169320628","https://openalex.org/W3178042202","https://openalex.org/W3197223534","https://openalex.org/W3198098585","https://openalex.org/W3209059054","https://openalex.org/W4225741214","https://openalex.org/W6604352794","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6638523607","https://openalex.org/W6674330103","https://openalex.org/W6679909955","https://openalex.org/W6681435938","https://openalex.org/W6727336983","https://openalex.org/W6733814495","https://openalex.org/W6764051988","https://openalex.org/W6770506093","https://openalex.org/W6772883055","https://openalex.org/W6779326418","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2787812568","https://openalex.org/W3134415196","https://openalex.org/W3174699664","https://openalex.org/W3169165652","https://openalex.org/W2785554650","https://openalex.org/W3032083627","https://openalex.org/W3161101519","https://openalex.org/W2911629330","https://openalex.org/W1520691178","https://openalex.org/W3202081933","https://openalex.org/W3149506872","https://openalex.org/W3082916138","https://openalex.org/W2810150561","https://openalex.org/W3138768133","https://openalex.org/W2952229419","https://openalex.org/W3200737004","https://openalex.org/W640713170","https://openalex.org/W2891725580","https://openalex.org/W2515319207","https://openalex.org/W1672047425"],"abstract_inverted_index":{"In":[0],"this":[1,85],"paper,":[2],"we":[3,87],"introduce":[4],"the":[5,33,39,69,124,158,161],"Kaizen":[6,59],"framework":[7,60],"that":[8,45,166],"uses":[9,25,167],"a":[10,26,65,151],"continuously":[11],"improving":[12],"teacher":[13,27],"to":[14,51,160],"generate":[15],"pseudo-labels":[16],"for":[17,49,73,79,91],"semi-supervised":[18,74],"speech":[19],"recognition":[20],"(ASR).":[21],"The":[22,58],"proposed":[23,125],"approach":[24,72,126],"model":[28,41],"which":[29],"is":[30,47,77],"updated":[31],"as":[32,64,101,103],"exponential":[34],"moving":[35],"average":[36],"(EMA)":[37],"of":[38,68,147,154],"student":[40],"parameters.":[42],"We":[43],"demonstrate":[44,88],"it":[46],"critical":[48],"EMA":[50],"be":[52,62],"accumulated":[53],"with":[54],"full-precision":[55],"floating":[56],"point.":[57],"can":[61],"seen":[63],"continuous":[66],"version":[67],"iterative":[70],"pseudo-labeling":[71],"training.":[75],"It":[76],"applicable":[78],"different":[80],"training":[81],"criteria,":[82],"and":[83,121,150],"in":[84,118],"paper":[86],"its":[89],"effectiveness":[90],"frame-level":[92],"hybrid":[93],"hidden":[94],"Markov":[95],"model-deep":[96],"neural":[97],"network":[98],"(HMM-DNN)":[99],"systems":[100],"well":[102],"sequence-level":[104],"Connectionist":[105],"Temporal":[106],"Classification":[107],"(CTC)":[108],"based":[109],"models.":[110],"For":[111],"large":[112,152],"scale":[113],"real-world":[114],"unsupervised":[115,155],"public":[116],"videos":[117],"UK":[119],"English":[120],"Italian":[122],"languages":[123],"i)":[127],"shows":[128],"more":[129],"than":[130],"10%":[131],"relative":[132],"word":[133],"error":[134],"rate":[135],"(WER)":[136],"reduction":[137],"over":[138],"standard":[139],"teacher-student":[140],"training;":[141],"ii)":[142],"using":[143],"just":[144],"10":[145],"hours":[146],"supervised":[148,163],"data":[149,156],"amount":[153],"closes":[157],"gap":[159],"upper-bound":[162],"ASR":[164],"system":[165],"650h":[168],"or":[169],"2700h":[170],"respectively.":[171]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
