{"id":"https://openalex.org/W3015537910","doi":"https://doi.org/10.1109/icassp40776.2020.9053889","title":"Quartznet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions","display_name":"Quartznet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015537910","doi":"https://doi.org/10.1109/icassp40776.2020.9053889","mag":"3015537910"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053889","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053889","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039963047","display_name":"Samuel Kriman","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Samuel Kriman","raw_affiliation_strings":["Univ. of Illinois Urbana-Champaign, NVIDIA, USA"],"affiliations":[{"raw_affiliation_string":"Univ. of Illinois Urbana-Champaign, NVIDIA, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064449185","display_name":"Stanislav Beliaev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stanislav Beliaev","raw_affiliation_strings":["High School of Economics, Univ. of Saint Petersburg, NVIDIA, USA"],"affiliations":[{"raw_affiliation_string":"High School of Economics, Univ. of Saint Petersburg, NVIDIA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032957280","display_name":"Boris Ginsburg","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boris Ginsburg","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036658158","display_name":"Jocelyn Huang","orcid":"https://orcid.org/0009-0008-4107-0431"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jocelyn Huang","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048403564","display_name":"Oleksii Kuchaiev","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oleksii Kuchaiev","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026088310","display_name":"Vitaly Lavrukhin","orcid":"https://orcid.org/0009-0006-7866-8301"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vitaly Lavrukhin","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090541149","display_name":"R. Bret Leary","orcid":"https://orcid.org/0000-0003-3771-8465"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Leary","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100762970","display_name":"Jason Li","orcid":"https://orcid.org/0000-0002-1150-3549"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Li","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100354700","display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0002-6064-0777"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5039963047"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":28.679,"has_fulltext":false,"cited_by_count":278,"citation_normalized_percentile":{"value":0.99726879,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"6124","last_page":"6128"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8256876468658447},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7309674024581909},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6678241491317749},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6549246907234192},{"id":"https://openalex.org/keywords/separable-space","display_name":"Separable space","score":0.6436764597892761},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6284343600273132},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5777245163917542},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.559398889541626},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5469642877578735},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.49197670817375183},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4580293297767639},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4296590983867645},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4219611585140228},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.358474999666214},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3579689860343933},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.289905846118927},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11657789349555969},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09916222095489502}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8256876468658447},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7309674024581909},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6678241491317749},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6549246907234192},{"id":"https://openalex.org/C70710897","wikidata":"https://www.wikidata.org/wiki/Q680081","display_name":"Separable space","level":2,"score":0.6436764597892761},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6284343600273132},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5777245163917542},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.559398889541626},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5469642877578735},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.49197670817375183},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4580293297767639},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4296590983867645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4219611585140228},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.358474999666214},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3579689860343933},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.289905846118927},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11657789349555969},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09916222095489502},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053889","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053889","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6899999976158142,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1836465849","https://openalex.org/W2024490156","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2407080277","https://openalex.org/W2502312327","https://openalex.org/W2531409750","https://openalex.org/W2577366047","https://openalex.org/W2612445135","https://openalex.org/W2746314669","https://openalex.org/W2763421725","https://openalex.org/W2781384251","https://openalex.org/W2904818793","https://openalex.org/W2936774411","https://openalex.org/W2945697643","https://openalex.org/W2949117887","https://openalex.org/W2955425717","https://openalex.org/W2963071736","https://openalex.org/W2963125010","https://openalex.org/W2963163009","https://openalex.org/W2964110616","https://openalex.org/W2972630480","https://openalex.org/W2973215447","https://openalex.org/W2974231335","https://openalex.org/W2977728428","https://openalex.org/W3006827623","https://openalex.org/W4297775537","https://openalex.org/W4297818305","https://openalex.org/W6629717138","https://openalex.org/W6638667902","https://openalex.org/W6675365184","https://openalex.org/W6713762819","https://openalex.org/W6724804524","https://openalex.org/W6732447497","https://openalex.org/W6737664043","https://openalex.org/W6743428213","https://openalex.org/W6745245109","https://openalex.org/W6747270024","https://openalex.org/W6749954789","https://openalex.org/W6757424787","https://openalex.org/W6762625080","https://openalex.org/W6762718338","https://openalex.org/W6767671539","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W2953716828","https://openalex.org/W2904857019","https://openalex.org/W4297807321","https://openalex.org/W3158431807","https://openalex.org/W2530952058","https://openalex.org/W2582836483","https://openalex.org/W4299366318","https://openalex.org/W4297580547","https://openalex.org/W2951583185","https://openalex.org/W4308155352"],"abstract_inverted_index":{"We":[0,70],"propose":[1],"a":[2],"new":[3,81],"end-to-end":[4],"neural":[5],"acoustic":[6],"model":[7,13,75],"for":[8],"automatic":[9],"speech":[10],"recognition.":[11],"The":[12,49],"is":[14,44],"composed":[15],"of":[16,27],"multiple":[17],"blocks":[18],"with":[19,32,46],"residual":[20],"connections":[21],"between":[22],"them.":[23],"Each":[24],"block":[25],"consists":[26],"one":[28],"or":[29],"more":[30],"modules":[31],"1D":[33],"time-channel":[34],"separable":[35],"convolutional":[36],"layers,":[37],"batch":[38],"normalization,":[39],"and":[40,58],"ReLU":[41],"layers.":[42],"It":[43],"trained":[45],"CTC":[47],"loss.":[48],"proposed":[50],"network":[51],"achieves":[52],"near":[53],"state-of-the-art":[54],"accuracy":[55],"on":[56,80],"LibriSpeech":[57],"Wall":[59],"Street":[60],"Journal,":[61],"while":[62],"having":[63],"fewer":[64],"parameters":[65],"than":[66],"all":[67],"competing":[68],"models.":[69],"also":[71],"demonstrate":[72],"that":[73],"this":[74],"can":[76],"be":[77],"effectively":[78],"fine-tuned":[79],"datasets.":[82]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":31},{"year":2024,"cited_by_count":32},{"year":2023,"cited_by_count":59},{"year":2022,"cited_by_count":43},{"year":2021,"cited_by_count":85},{"year":2020,"cited_by_count":24}],"updated_date":"2026-04-11T08:14:18.477133","created_date":"2025-10-10T00:00:00"}
