{"id":"https://openalex.org/W2530876040","doi":"https://doi.org/10.1109/icassp.2017.7953077","title":"Very deep convolutional networks for end-to-end speech recognition","display_name":"Very deep convolutional networks for end-to-end speech recognition","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2530876040","doi":"https://doi.org/10.1109/icassp.2017.7953077","mag":"2530876040"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2017.7953077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100619178","display_name":"Zhang Yu","orcid":"https://orcid.org/0000-0003-2012-226X"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Massachusetts Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016918393","display_name":"William Chan","orcid":"https://orcid.org/0000-0002-0661-1764"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"William Chan","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112445699","display_name":"Navdeep Jaitly","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Navdeep Jaitly","raw_affiliation_strings":["Google Brain"],"affiliations":[{"raw_affiliation_string":"Google Brain","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100619178"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":67.83717279,"has_fulltext":false,"cited_by_count":432,"citation_normalized_percentile":{"value":0.99883299,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4845","last_page":"4849"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8346806764602661},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.7443455457687378},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6744369268417358},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6155167818069458},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6101767420768738},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5756527781486511},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5654782652854919},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5331003069877625},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5146964192390442},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4831249415874481},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4637710452079773},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4299308955669403},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4104993939399719},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16440466046333313},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.14610520005226135}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8346806764602661},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.7443455457687378},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6744369268417358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6155167818069458},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6101767420768738},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5756527781486511},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5654782652854919},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5331003069877625},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5146964192390442},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4831249415874481},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4637710452079773},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4299308955669403},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4104993939399719},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16440466046333313},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.14610520005226135},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2017.7953077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1026270304","https://openalex.org/W1485009520","https://openalex.org/W1522301498","https://openalex.org/W1554982972","https://openalex.org/W1686810756","https://openalex.org/W1828163288","https://openalex.org/W1836465849","https://openalex.org/W2005708641","https://openalex.org/W2064675550","https://openalex.org/W2097117768","https://openalex.org/W2099257174","https://openalex.org/W2102113734","https://openalex.org/W2108677974","https://openalex.org/W2112739286","https://openalex.org/W2112796928","https://openalex.org/W2133564696","https://openalex.org/W2160815625","https://openalex.org/W2180816288","https://openalex.org/W2188183693","https://openalex.org/W2193413348","https://openalex.org/W2194775991","https://openalex.org/W2198724430","https://openalex.org/W2327501763","https://openalex.org/W2514969556","https://openalex.org/W2515753980","https://openalex.org/W2950621961","https://openalex.org/W2962719052","https://openalex.org/W2962826786","https://openalex.org/W2962835968","https://openalex.org/W2962965465","https://openalex.org/W2963174142","https://openalex.org/W2963911037","https://openalex.org/W2964084166","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W6623517193","https://openalex.org/W6626481562","https://openalex.org/W6628877408","https://openalex.org/W6631190155","https://openalex.org/W6633336918","https://openalex.org/W6637373629","https://openalex.org/W6638205174","https://openalex.org/W6638444622","https://openalex.org/W6638667902","https://openalex.org/W6638749077","https://openalex.org/W6674758992","https://openalex.org/W6675365184","https://openalex.org/W6676315081","https://openalex.org/W6679434410","https://openalex.org/W6686002079","https://openalex.org/W6687566353"],"related_works":["https://openalex.org/W4289763776","https://openalex.org/W2810330923","https://openalex.org/W2916997151","https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W4221142855","https://openalex.org/W2050138804","https://openalex.org/W4398173524","https://openalex.org/W4290708361","https://openalex.org/W2129812225"],"abstract_inverted_index":{"Sequence-to-sequence":[0],"models":[1,11,61],"have":[2,12],"shown":[3],"success":[4],"in":[5,66],"end-to-end":[6,38],"speech":[7],"recognition.":[8],"However":[9],"these":[10],"only":[13],"used":[14],"shallow":[15],"acoustic":[16],"encoder":[17],"networks.":[18],"In":[19],"our":[20],"work,":[21],"we":[22],"successively":[23],"train":[24],"very":[25,54],"deep":[26,55,100],"convolutional":[27,50,58],"networks":[28],"to":[29,52],"add":[30,71],"more":[31],"expressive":[32],"power":[33],"and":[34,49,57,70,84],"better":[35],"generalization":[36],"for":[37],"ASR":[39,82],"models.":[40],"We":[41,77],"apply":[42],"network-in-network":[43],"principles,":[44],"batch":[45],"normalization,":[46],"residual":[47],"connections":[48],"LSTMs":[51],"build":[53],"recurrent":[56],"structures.":[59],"Our":[60],"exploit":[62],"the":[63,67,80],"spectral":[64],"structure":[65],"feature":[68],"space":[69],"computational":[72],"depth":[73],"without":[74,90],"overfitting":[75],"issues.":[76],"experiment":[78],"with":[79],"WSJ":[81],"task":[83],"achieve":[85],"10.5%":[86],"word":[87],"error":[88],"rate":[89],"any":[91],"dictionary":[92],"or":[93],"language":[94],"model":[95],"using":[96],"a":[97],"15":[98],"layer":[99],"network.":[101]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":27},{"year":2021,"cited_by_count":49},{"year":2020,"cited_by_count":76},{"year":2019,"cited_by_count":116},{"year":2018,"cited_by_count":78},{"year":2017,"cited_by_count":26},{"year":2016,"cited_by_count":1}],"updated_date":"2026-02-20T08:17:22.645390","created_date":"2025-10-10T00:00:00"}
