{"id":"https://openalex.org/W4391021755","doi":"https://doi.org/10.1109/asru57964.2023.10389648","title":"TorchAudio 2.1: Advancing Speech Recognition, Self-Supervised Learning, and Audio Processing Components for Pytorch","display_name":"TorchAudio 2.1: Advancing Speech Recognition, Self-Supervised Learning, and Audio Processing Components for Pytorch","publication_year":2023,"publication_date":"2023-12-16","ids":{"openalex":"https://openalex.org/W4391021755","doi":"https://doi.org/10.1109/asru57964.2023.10389648"},"language":"en","primary_location":{"id":"doi:10.1109/asru57964.2023.10389648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021004964","display_name":"Jeff Hwang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jeff Hwang","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025983396","display_name":"Moto Hira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moto Hira","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010792181","display_name":"Caroline Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caroline Chen","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100724974","display_name":"Xiaohui Zhang","orcid":"https://orcid.org/0000-0003-0245-6792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaohui Zhang","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031088292","display_name":"Zhaoheng Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhaoheng Ni","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077772423","display_name":"Guangzhi Sun","orcid":"https://orcid.org/0000-0002-5886-056X"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Guangzhi Sun","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001790767","display_name":"Pingchuan Ma","orcid":"https://orcid.org/0000-0003-3752-0803"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pingchuan Ma","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108823342","display_name":"Ruizhe Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruizhe Huang","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028944337","display_name":"Vineel Pratap","orcid":"https://orcid.org/0000-0003-3333-910X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vineel Pratap","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074415592","display_name":"Yuekai Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuekai Zhang","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080751032","display_name":"Anurag Kumar","orcid":"https://orcid.org/0000-0002-1164-144X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anurag Kumar","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024155095","display_name":"Chin-Yun Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chin-Yun Yu","raw_affiliation_strings":["Queen Mary University of London"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021302609","display_name":"Chuang Zhu","orcid":"https://orcid.org/0000-0001-5155-7069"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chuang Zhu","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069296252","display_name":"Chunxi Liu","orcid":"https://orcid.org/0000-0001-5441-9374"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chunxi Liu","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027275703","display_name":"Jacob Kahn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jacob Kahn","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040811098","display_name":"Mirco Ravanelli","orcid":"https://orcid.org/0000-0002-3929-5526"},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mirco Ravanelli","raw_affiliation_strings":["Concordia University"],"affiliations":[{"raw_affiliation_string":"Concordia University","institution_ids":["https://openalex.org/I60158472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103111009","display_name":"Peng Sun","orcid":"https://orcid.org/0000-0003-3268-8092"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng Sun","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004966636","display_name":"Yangyang Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yangyang Shi","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081330002","display_name":"Yumeng Tao","orcid":"https://orcid.org/0000-0002-6225-049X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yumeng Tao","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":20,"corresponding_author_ids":["https://openalex.org/A5021004964"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.928,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.96355649,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8437906503677368},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5511573553085327},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5045913457870483},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4956406354904175},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.4937330186367035},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.41465485095977783},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40899786353111267},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.33883386850357056}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8437906503677368},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5511573553085327},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5045913457870483},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4956406354904175},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.4937330186367035},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.41465485095977783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40899786353111267},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.33883386850357056},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru57964.2023.10389648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru57964.2023.10389648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5799999833106995}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W2127141656","https://openalex.org/W2191779130","https://openalex.org/W2219249508","https://openalex.org/W2402583539","https://openalex.org/W2517616541","https://openalex.org/W2591804103","https://openalex.org/W2808631503","https://openalex.org/W2891205112","https://openalex.org/W2933138175","https://openalex.org/W2936774411","https://openalex.org/W2953190524","https://openalex.org/W2962780374","https://openalex.org/W2974231335","https://openalex.org/W2982471419","https://openalex.org/W3004309045","https://openalex.org/W3035965352","https://openalex.org/W3036601975","https://openalex.org/W3043783436","https://openalex.org/W3096408984","https://openalex.org/W3097777922","https://openalex.org/W3099878876","https://openalex.org/W3162341667","https://openalex.org/W3162665866","https://openalex.org/W3163217847","https://openalex.org/W3163793923","https://openalex.org/W3167533889","https://openalex.org/W3197580070","https://openalex.org/W3198429080","https://openalex.org/W3208743843","https://openalex.org/W3208887030","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3213726885","https://openalex.org/W4221149170","https://openalex.org/W4221153068","https://openalex.org/W4225270933","https://openalex.org/W4225310539","https://openalex.org/W4226300471","https://openalex.org/W4286895682","https://openalex.org/W4289665794","https://openalex.org/W4297841641","https://openalex.org/W4297841719","https://openalex.org/W4311553423","https://openalex.org/W4372260337","https://openalex.org/W4372346152","https://openalex.org/W4378105483","https://openalex.org/W4385245566","https://openalex.org/W4385822542","https://openalex.org/W4385822854","https://openalex.org/W4385823192","https://openalex.org/W6679855610","https://openalex.org/W6688816777","https://openalex.org/W6739901393","https://openalex.org/W6754420807","https://openalex.org/W6767671539","https://openalex.org/W6780218876","https://openalex.org/W6809739816","https://openalex.org/W6810168380","https://openalex.org/W6847652939","https://openalex.org/W6852909395"],"related_works":["https://openalex.org/W1834994814","https://openalex.org/W2981428355","https://openalex.org/W1599055764","https://openalex.org/W2041273198","https://openalex.org/W2149163000","https://openalex.org/W2962858469","https://openalex.org/W2131711534","https://openalex.org/W2289873871","https://openalex.org/W2559040841","https://openalex.org/W114661351"],"abstract_inverted_index":{"TorchAudio":[0],"is":[1],"an":[2],"open-source":[3],"audio":[4,21],"and":[5,18,22,29,43,56,58,73,82,89,98,115],"speech":[6,23,79,96,100],"processing":[7],"library":[8],"built":[9],"for":[10,91],"PyTorch.":[11],"It":[12],"aims":[13],"to":[14,39],"accelerate":[15],"the":[16],"research":[17],"development":[19,54],"of":[20,105],"technologies":[24],"by":[25,46],"providing":[26],"well-designed,":[27],"easy-to-use,":[28],"performant":[30],"PyTorch":[31],"components.":[32],"Its":[33],"contributors":[34],"routinely":[35],"engage":[36],"with":[37],"users":[38],"understand":[40],"their":[41,113],"needs":[42],"fulfill":[44],"them":[45],"developing":[47],"impactful":[48],"features.":[49],"Here,":[50],"we":[51,62,111],"survey":[52],"TorchAudio\u2019s":[53],"principles":[55],"contents":[57],"highlight":[59],"key":[60],"features":[61],"include":[63],"in":[64],"its":[65],"latest":[66],"version":[67],"(2.1):":[68],"self-supervised":[69],"learning":[70],"pre-trained":[71],"pipelines":[72],"training":[74,83],"recipes,":[75,84],"high-performance":[76],"CTC":[77],"decoders,":[78],"recognition":[80],"models":[81],"advanced":[85],"media":[86],"I/O":[87],"capabilities,":[88],"tools":[90],"performing":[92],"forced":[93],"alignment,":[94],"multi-channel":[95],"enhancement,":[97],"reference-less":[99],"assessment.":[101],"For":[102],"a":[103],"selection":[104],"these":[106],"features,":[107],"through":[108],"empirical":[109],"studies,":[110],"demonstrate":[112],"efficacy":[114],"show":[116],"that":[117],"they":[118],"achieve":[119],"competitive":[120],"or":[121],"state-of-the-art":[122],"performance.":[123]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":8}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
