{"id":"https://openalex.org/W3203407300","doi":"https://doi.org/10.1109/icassp43922.2022.9746682","title":"WENETSPEECH: A 10000+ Hours Multi-Domain Mandarin Corpus for Speech Recognition","display_name":"WENETSPEECH: A 10000+ Hours Multi-Domain Mandarin Corpus for Speech Recognition","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3203407300","doi":"https://doi.org/10.1109/icassp43922.2022.9746682","mag":"3203407300"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746682","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100449712","display_name":"Binbin Zhang","orcid":"https://orcid.org/0000-0002-1874-1881"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]},{"id":"https://openalex.org/I99418890","display_name":"Northwestern Polytechnic University","ror":"https://ror.org/05wn69s11","country_code":"US","type":"education","lineage":["https://openalex.org/I99418890"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Binbin Zhang","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","Mobvoi Inc","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","institution_ids":["https://openalex.org/I99418890","https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100751610","display_name":"Hang Lv","orcid":"https://orcid.org/0000-0003-3761-1684"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]},{"id":"https://openalex.org/I99418890","display_name":"Northwestern Polytechnic University","ror":"https://ror.org/05wn69s11","country_code":"US","type":"education","lineage":["https://openalex.org/I99418890"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Hang Lv","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","WeNet Open Source Community","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","institution_ids":["https://openalex.org/I99418890","https://openalex.org/I17145004"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783173","display_name":"Pengcheng Guo","orcid":"https://orcid.org/0009-0001-2388-5935"},"institutions":[{"id":"https://openalex.org/I99418890","display_name":"Northwestern Polytechnic University","ror":"https://ror.org/05wn69s11","country_code":"US","type":"education","lineage":["https://openalex.org/I99418890"]},{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Pengcheng Guo","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","institution_ids":["https://openalex.org/I99418890","https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018773485","display_name":"Qijie Shao","orcid":"https://orcid.org/0009-0000-2145-4077"},"institutions":[{"id":"https://openalex.org/I99418890","display_name":"Northwestern Polytechnic University","ror":"https://ror.org/05wn69s11","country_code":"US","type":"education","lineage":["https://openalex.org/I99418890"]},{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Qijie Shao","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","institution_ids":["https://openalex.org/I99418890","https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083993732","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0002-9658-050X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chao Yang","raw_affiliation_strings":["Mobvoi Inc","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]},{"id":"https://openalex.org/I99418890","display_name":"Northwestern Polytechnic University","ror":"https://ror.org/05wn69s11","country_code":"US","type":"education","lineage":["https://openalex.org/I99418890"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science","institution_ids":["https://openalex.org/I99418890","https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696102","display_name":"Xin Xu","orcid":"https://orcid.org/0000-0003-0748-3669"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Xu","raw_affiliation_strings":["Beijing Shell Shell Technology Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Shell Shell Technology Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101791717","display_name":"Hui Bu","orcid":"https://orcid.org/0000-0002-5883-1195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hui Bu","raw_affiliation_strings":["Beijing Shell Shell Technology Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Shell Shell Technology Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100386498","display_name":"Xiaoyu Chen","orcid":"https://orcid.org/0000-0003-0426-8920"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoyu Chen","raw_affiliation_strings":["Mobvoi Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071742855","display_name":"Chenchen Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenchen Zeng","raw_affiliation_strings":["Mobvoi Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100602151","display_name":"Di Wu","orcid":"https://orcid.org/0000-0002-7788-9202"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Mobvoi Inc","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041630666","display_name":"Zhendong Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhendong Peng","raw_affiliation_strings":["Mobvoi Inc","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mobvoi Inc","institution_ids":[]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5100449712"],"corresponding_institution_ids":["https://openalex.org/I17145004","https://openalex.org/I99418890"],"apc_list":null,"apc_paid":null,"fwci":13.103,"has_fulltext":false,"cited_by_count":144,"citation_normalized_percentile":{"value":0.99258082,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"6182","last_page":"6186"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8514913320541382},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7284635305404663},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.632246196269989},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5371986031532288},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.5184009075164795},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4914144277572632},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47330227494239807},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4588523507118225},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.45102566480636597},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.41432735323905945},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.35172587633132935},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10370472073554993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8514913320541382},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7284635305404663},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.632246196269989},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5371986031532288},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.5184009075164795},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4914144277572632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47330227494239807},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4588523507118225},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.45102566480636597},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.41432735323905945},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.35172587633132935},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10370472073554993},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746682","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1828163288","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2158791054","https://openalex.org/W2160815625","https://openalex.org/W2184045248","https://openalex.org/W2193413348","https://openalex.org/W2194187530","https://openalex.org/W2327501763","https://openalex.org/W2514741789","https://openalex.org/W2519818067","https://openalex.org/W2526425061","https://openalex.org/W2739883972","https://openalex.org/W2802023636","https://openalex.org/W2888867175","https://openalex.org/W2889048668","https://openalex.org/W2892009249","https://openalex.org/W2933138175","https://openalex.org/W2936774411","https://openalex.org/W2962780374","https://openalex.org/W2963211739","https://openalex.org/W2963242190","https://openalex.org/W2963403868","https://openalex.org/W2973049979","https://openalex.org/W3036601975","https://openalex.org/W3037057938","https://openalex.org/W3042170933","https://openalex.org/W3095410713","https://openalex.org/W3097777922","https://openalex.org/W3099782249","https://openalex.org/W3128442956","https://openalex.org/W3151526698","https://openalex.org/W3160799772","https://openalex.org/W3163793923","https://openalex.org/W3165666670","https://openalex.org/W3168612151","https://openalex.org/W3169688220","https://openalex.org/W3197478142","https://openalex.org/W3197917733","https://openalex.org/W3198429080","https://openalex.org/W3198694222","https://openalex.org/W4287173589","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6631362777","https://openalex.org/W6638749077","https://openalex.org/W6687566353","https://openalex.org/W6728030952","https://openalex.org/W6739901393","https://openalex.org/W6754473786","https://openalex.org/W6780218876","https://openalex.org/W6780815891","https://openalex.org/W6795952400","https://openalex.org/W6796463219"],"related_works":["https://openalex.org/W642007152","https://openalex.org/W2401827384","https://openalex.org/W2355290951","https://openalex.org/W2069501481","https://openalex.org/W2052688117","https://openalex.org/W2552102772","https://openalex.org/W2103239478","https://openalex.org/W4294771049","https://openalex.org/W1523214805","https://openalex.org/W2168417340"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,95],"present":[4],"WenetSpeech,":[5],"a":[6,43,78,97],"multi-domain":[7],"Mandarin":[8,193],"corpus":[9,195],"consisting":[10],"of":[11,45,184],"10000+":[12],"hours":[13,18,25,30],"high-quality":[14,79,117],"labeled":[15,20],"speech,":[16,21,27],"2400+":[17],"weakly":[19],"and":[22,39,51,107,139,166,168],"about":[23],"10000":[24],"unlabeled":[26],"with":[28,121,153,196],"22400+":[29],"in":[31,130],"total.":[32],"We":[33,111],"collect":[34],"the":[35,64,69,73,91,109,172,182,189],"data":[36,71],"from":[37,134,142],"YouTube":[38,70],"Podcast,":[40],"which":[41,198],"covers":[42],"variety":[44],"speaking":[46],"styles,":[47],"scenarios,":[48],"domains,":[49],"topics":[50],"noisy":[52],"conditions.":[53],"An":[54],"optical":[55],"character":[56],"recognition":[57,161,169],"(OCR)":[58],"method":[59],"is":[60,83,188],"introduced":[61],"to":[62,85,104],"generate":[63,86],"audio/text":[65,87],"segmentation":[66],"candidates":[67,89],"for":[68,90,123,127,136,145,157],"on":[72,171,201],"corresponding":[74],"video":[75],"subtitles,":[76],"while":[77],"ASR":[80],"transcription":[81],"system":[82],"used":[84],"pair":[88],"Podcast":[92],"data.":[93],"Then":[94],"propose":[96],"novel":[98],"end-to-end":[99],"label":[100],"error":[101],"detection":[102],"approach":[103],"further":[105],"validate":[106],"filter":[108],"candidates.":[110],"also":[112,177],"provide":[113],"three":[114,158,173],"manually":[115],"labelled":[116],"test":[118,174],"sets":[119,175],"along":[120],"WenetSpeech":[122,154,187],"evaluation":[124],"\u2013":[125],"Dev":[126],"cross-validation":[128],"purpose":[129],"training,":[131],"Test_Net,":[132],"collected":[133],"Internet":[135],"matched":[137],"test,":[138],"Test_Meeting,":[140],"recorded":[141],"real":[143],"meetings":[144],"more":[146],"challenging":[147],"mismatched":[148],"test.":[149],"Baseline":[150],"systems":[151],"trained":[152],"are":[155,176],"provided":[156,178],"popular":[159],"speech":[160,194,203],"toolkits,":[162],"namely":[163],"Kaldi,":[164],"ESPnet,":[165],"WeNet,":[167],"results":[170],"as":[179],"benchmarks.":[180],"To":[181],"best":[183],"our":[185],"knowledge,":[186],"current":[190],"largest":[191],"open-source":[192],"transcriptions,":[197],"benefits":[199],"research":[200],"production-level":[202],"recognition.":[204]},"counts_by_year":[{"year":2026,"cited_by_count":18},{"year":2025,"cited_by_count":37},{"year":2024,"cited_by_count":54},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":9}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
