{"id":"https://openalex.org/W4221167707","doi":"https://doi.org/10.21437/interspeech.2022-483","title":"WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit","display_name":"WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4221167707","doi":"https://doi.org/10.21437/interspeech.2022-483"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-483","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-483","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100449716","display_name":"Binbin Zhang","orcid":"https://orcid.org/0000-0002-7164-5127"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Binbin Zhang","raw_affiliation_strings":["WeNet Open Source Community","Horizon Robotics, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037740394","display_name":"Di Wu","orcid":"https://orcid.org/0000-0001-9775-8026"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Horizon Robotics, Beijing, China","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041630666","display_name":"Zhendong Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Peng","raw_affiliation_strings":["WeNet Open Source Community","Horizon Robotics, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074833963","display_name":"Xingchen Song","orcid":"https://orcid.org/0009-0009-9516-5361"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingchen Song","raw_affiliation_strings":["Horizon Robotics, Beijing, China","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109482068","display_name":"Zhuoyuan Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuoyuan Yao","raw_affiliation_strings":["Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100751610","display_name":"Hang Lv","orcid":"https://orcid.org/0000-0003-3761-1684"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Lv","raw_affiliation_strings":["Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066245750","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-9051-2111"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083993732","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0002-9658-050X"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yang","raw_affiliation_strings":["Horizon Robotics, Beijing, China","WeNet Open Source Community"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031150833","display_name":"Fuping Pan","orcid":"https://orcid.org/0000-0001-9171-0726"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuping Pan","raw_affiliation_strings":["Horizon Robotics, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053316728","display_name":"Jianwei Niu","orcid":"https://orcid.org/0000-0003-3946-5107"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Niu","raw_affiliation_strings":["Horizon Robotics, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100449716"],"corresponding_institution_ids":["https://openalex.org/I4401726824"],"apc_list":null,"apc_paid":null,"fwci":9.6712,"has_fulltext":false,"cited_by_count":97,"citation_normalized_percentile":{"value":0.98712242,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1661","last_page":"1665"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.7725163698196411},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.658038854598999},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6259312629699707},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24803531169891357}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.7725163698196411},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.658038854598999},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6259312629699707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24803531169891357}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-483","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-483","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1586532344","https://openalex.org/W1828163288","https://openalex.org/W2125529971","https://openalex.org/W2327501763","https://openalex.org/W2403440562","https://openalex.org/W2507132449","https://openalex.org/W2526425061","https://openalex.org/W2750499125","https://openalex.org/W2889048668","https://openalex.org/W2936774411","https://openalex.org/W2953384591","https://openalex.org/W2962760690","https://openalex.org/W2962780374","https://openalex.org/W2963242190","https://openalex.org/W2973122799","https://openalex.org/W3006889321","https://openalex.org/W3011207290","https://openalex.org/W3011339933","https://openalex.org/W3097777922","https://openalex.org/W3097794466","https://openalex.org/W3144345593","https://openalex.org/W3151526698","https://openalex.org/W3167533889","https://openalex.org/W4287120025","https://openalex.org/W4288072840","https://openalex.org/W4291566970","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2368779261","https://openalex.org/W2778699561","https://openalex.org/W2971982237","https://openalex.org/W2938947737","https://openalex.org/W2312526371","https://openalex.org/W2954695182","https://openalex.org/W4224917401","https://openalex.org/W4303519918","https://openalex.org/W2600628583","https://openalex.org/W2771275309"],"abstract_inverted_index":{"Recently,":[0],"we":[1,47],"made":[2],"available":[3,183],"WeNet":[4,49,105,164,177],"[1],":[5],"a":[6,14,20,32,58,73,101,119,150],"production-oriented":[7,186],"end-to-end":[8],"speech":[9],"recognition":[10,171],"toolkit,":[11],"which":[12,66,124],"introduces":[13],"unified":[15,59,120,151],"two-pass":[16,60],"(U2)":[17],"framework":[18,61],"and":[19,27,39,86,100,138,145,181],"built-in":[21],"runtime":[22],"to":[23,77,131,153,168],"address":[24],"the":[25,68,79,83,87,90,108,162,175],"streaming":[26],"non-streaming":[28],"decoding":[29],"modes":[30],"in":[31,44,114,142],"single":[33],"model.To":[34],"further":[35],"improve":[36,78],"ASR":[37,140],"performance":[38,88,172],"facilitate":[40],"various":[41,179],"production":[42,115,137],"requirements,":[43],"this":[45],"paper,":[46],"present":[48],"2.0":[50,165],"with":[51,62],"four":[52],"important":[53,185],"updates.(1)":[54],"We":[55,93,117,148],"propose":[56],"U2++,":[57],"bidirectional":[63],"attention":[64,75],"decoders,":[65],"includes":[67],"future":[69],"contextual":[70,121],"information":[71],"by":[72],"right-toleft":[74],"decoder":[76,103],"representative":[80],"ability":[81,135],"of":[82,110],"shared":[84],"encoder":[85],"during":[89],"rescoring":[91],"stage.(2)":[92],"introduce":[94],"an":[95],"n-gram":[96],"based":[97],"language":[98],"model":[99,159],"WFSTbased":[102],"into":[104],"2.0,":[106],"promoting":[107],"use":[109],"rich":[111],"text":[112],"data":[113,156],"scenarios.(3)":[116],"design":[118,149],"biasing":[122],"framework,":[123],"leverages":[125],"user-specific":[126],"context":[127],"(e.g.,":[128],"contact":[129],"lists)":[130],"provide":[132],"rapid":[133],"adaptation":[134],"for":[136,157],"improves":[139],"accuracy":[141],"both":[143],"with-LM":[144],"without-LM":[146],"scenarios.(4)":[147],"IO":[152],"support":[154],"large-scale":[155],"effective":[158],"training.In":[160],"summary,":[161],"brand-new":[163],"achieves":[166],"up":[167],"10%":[169],"relative":[170],"improvement":[173],"over":[174],"original":[176],"on":[178],"corpora":[180],"makes":[182],"several":[184],"features.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":20},{"year":2023,"cited_by_count":31},{"year":2022,"cited_by_count":6}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
