{"id":"https://openalex.org/W3150269179","doi":"https://doi.org/10.1109/slt48900.2021.9383532","title":"Multi-Quartznet: Multi-Resolution Convolution for Speech Recognition with Multi-Layer Feature Fusion","display_name":"Multi-Quartznet: Multi-Resolution Convolution for Speech Recognition with Multi-Layer Feature Fusion","publication_year":2021,"publication_date":"2021-01-19","ids":{"openalex":"https://openalex.org/W3150269179","doi":"https://doi.org/10.1109/slt48900.2021.9383532","mag":"3150269179"},"language":"en","primary_location":{"id":"doi:10.1109/slt48900.2021.9383532","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076982760","display_name":"Jian Luo","orcid":"https://orcid.org/0000-0002-9756-3066"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Luo","raw_affiliation_strings":["Ping An Technology (Shenzhen) Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology (Shenzhen) Co., Ltd","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074472751","display_name":"Jianzong Wang","orcid":"https://orcid.org/0000-0002-9237-4231"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Jianzong Wang","raw_affiliation_strings":["Ping An Technology (Shenzhen) Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology (Shenzhen) Co., Ltd","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101977180","display_name":"Ning Cheng","orcid":"https://orcid.org/0000-0002-0988-5023"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Ning Cheng","raw_affiliation_strings":["Ping An Technology (Shenzhen) Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology (Shenzhen) Co., Ltd","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011273813","display_name":"Guilin Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Guilin Jiang","raw_affiliation_strings":["Ping An Technology (Shenzhen) Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology (Shenzhen) Co., Ltd","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016038454","display_name":"Jing Xiao","orcid":"https://orcid.org/0000-0001-9615-4749"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Jing Xiao","raw_affiliation_strings":["Ping An Technology (Shenzhen) Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology (Shenzhen) Co., Ltd","institution_ids":["https://openalex.org/I4401726822"]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1195,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81839392,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"82","last_page":"88"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7785665988922119},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.762558102607727},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.6963061094284058},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6294016242027283},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6275137662887573},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5897232890129089},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.552468478679657},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5441149473190308},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5419481992721558},{"id":"https://openalex.org/keywords/convolutional-code","display_name":"Convolutional code","score":0.5166507363319397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48769745230674744},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44326385855674744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41833022236824036},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3418131470680237},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.2251589596271515},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1488056182861328},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10306134819984436},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08550375699996948}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7785665988922119},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.762558102607727},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.6963061094284058},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6294016242027283},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6275137662887573},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5897232890129089},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.552468478679657},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5441149473190308},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5419481992721558},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.5166507363319397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48769745230674744},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44326385855674744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41833022236824036},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3418131470680237},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2251589596271515},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1488056182861328},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10306134819984436},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08550375699996948},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt48900.2021.9383532","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5600000023841858}],"awards":[],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1600744878","https://openalex.org/W1922655562","https://openalex.org/W2193413348","https://openalex.org/W2518108298","https://openalex.org/W2520160253","https://openalex.org/W2565639579","https://openalex.org/W2752782242","https://openalex.org/W2884585870","https://openalex.org/W2888907095","https://openalex.org/W2899640612","https://openalex.org/W2904818793","https://openalex.org/W2939111082","https://openalex.org/W2945697643","https://openalex.org/W2963242190","https://openalex.org/W2963263347","https://openalex.org/W2963420686","https://openalex.org/W2963446712","https://openalex.org/W2965116050","https://openalex.org/W2972630480","https://openalex.org/W2972818416","https://openalex.org/W2973201641","https://openalex.org/W2973215447","https://openalex.org/W2976556660","https://openalex.org/W2977728428","https://openalex.org/W2982427813","https://openalex.org/W2989478250","https://openalex.org/W2990857353","https://openalex.org/W2995025901","https://openalex.org/W3006827623","https://openalex.org/W3007328579","https://openalex.org/W3015537910","https://openalex.org/W3015671919","https://openalex.org/W3016042429","https://openalex.org/W3016221831","https://openalex.org/W3023126978","https://openalex.org/W3034307881","https://openalex.org/W3153471465","https://openalex.org/W6640090968","https://openalex.org/W6687566353","https://openalex.org/W6725739302","https://openalex.org/W6726497184","https://openalex.org/W6727336983","https://openalex.org/W6730903564","https://openalex.org/W6743731764","https://openalex.org/W6756137172","https://openalex.org/W6757424787","https://openalex.org/W6762625080","https://openalex.org/W6764150997","https://openalex.org/W6769645241","https://openalex.org/W6770384703","https://openalex.org/W6771312958","https://openalex.org/W6773166886","https://openalex.org/W6777158299","https://openalex.org/W6794034758"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W147410782","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W3022252430","https://openalex.org/W4287804464","https://openalex.org/W3103989898","https://openalex.org/W2810679507","https://openalex.org/W2964954556"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"an":[5],"end-to-end":[6],"speech":[7],"recognition":[8],"network":[9],"based":[10],"on":[11,49,91],"Nvidia's":[12],"previous":[13],"QuartzNet":[14,98],"[1]":[15],"model.":[16],"We":[17],"try":[18],"to":[19,102],"promote":[20],"the":[21,33,57],"model":[22,87],"performance,":[23],"and":[24,99],"design":[25],"three":[26],"components:":[27],"(1)":[28],"Multi-Resolution":[29],"Convolution":[30],"Module,":[31,55,72],"re-places":[32],"original":[34,97],"1D":[35],"time-channel":[36],"separable":[37],"convolution":[38],"with":[39],"multi-stream":[40],"convolutions.":[41],"Each":[42],"stream":[43,63],"has":[44],"a":[45],"unique":[46],"dilated":[47],"stride":[48],"convolutional":[50,62,75],"operations.":[51],"(2)":[52],"Channel-Wise":[53],"Attention":[54],"calculates":[56],"attention":[58],"weight":[59],"of":[60],"each":[61,74],"by":[64,77],"spatial":[65],"channel-wise":[66],"pooling.":[67],"(3)":[68],"Multi-Layer":[69],"Feature":[70],"Fusion":[71],"reweights":[73],"block":[76],"global":[78],"multi-layer":[79],"feature":[80],"maps.":[81],"Our":[82],"experiments":[83],"demonstrate":[84],"that":[85],"Multi-QuartzNet":[86],"achieves":[88],"CER":[89],"6.77%":[90],"AISHELL-1":[92],"data":[93],"set,":[94],"which":[95],"outperforms":[96],"is":[100],"close":[101],"state-of-art":[103],"result.":[104]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
