{"id":"https://openalex.org/W2963212833","doi":"https://doi.org/10.21437/interspeech.2015-648","title":"Long short-term memory based convolutional recurrent neural networks for large vocabulary speech recognition","display_name":"Long short-term memory based convolutional recurrent neural networks for large vocabulary speech recognition","publication_year":2015,"publication_date":"2015-09-06","ids":{"openalex":"https://openalex.org/W2963212833","doi":"https://doi.org/10.21437/interspeech.2015-648","mag":"2963212833"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2015-648","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2015-648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2015","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081173423","display_name":"Xiangang Li","orcid":"https://orcid.org/0000-0002-7810-1077"},"institutions":[{"id":"https://openalex.org/I4210142533","display_name":"Hearing, Speech & Deaf Center","ror":"https://ror.org/049ae8f38","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210142533"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Xiangang Li","raw_affiliation_strings":["Speech and Hearing Research Center,","Key Laboratory of Machine Perception (Ministry of Education), Peking University, Beijing, 100871"],"affiliations":[{"raw_affiliation_string":"Speech and Hearing Research Center,","institution_ids":["https://openalex.org/I4210142533"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception (Ministry of Education), Peking University, Beijing, 100871","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084685506","display_name":"Xihong Wu","orcid":"https://orcid.org/0009-0004-5236-7469"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210142533","display_name":"Hearing, Speech & Deaf Center","ror":"https://ror.org/049ae8f38","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210142533"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Xihong Wu","raw_affiliation_strings":["Key Laboratory of Machine Perception (Ministry of Education), Peking University, Beijing, 100871","Speech and Hearing Research Center,"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception (Ministry of Education), Peking University, Beijing, 100871","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Speech and Hearing Research Center,","institution_ids":["https://openalex.org/I4210142533"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5081173423"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I4210142533"],"apc_list":null,"apc_paid":null,"fwci":0.4451,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.80915678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3219","last_page":"3223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.9013632535934448},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8537173271179199},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7154847383499146},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6243239641189575},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6022351980209351},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5869744420051575},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5304229855537415},{"id":"https://openalex.org/keywords/time-delay-neural-network","display_name":"Time delay neural network","score":0.4823398292064667},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.479618638753891},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4391142427921295},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4349032938480377},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.43165016174316406},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3785012662410736},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0550616979598999}],"concepts":[{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.9013632535934448},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8537173271179199},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7154847383499146},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6243239641189575},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6022351980209351},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5869744420051575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5304229855537415},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.4823398292064667},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.479618638753891},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4391142427921295},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4349032938480377},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.43165016174316406},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3785012662410736},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0550616979598999},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2015-648","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2015-648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2015","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1499864241","https://openalex.org/W1526236009","https://openalex.org/W1685006559","https://openalex.org/W1815076433","https://openalex.org/W1886418050","https://openalex.org/W1968419113","https://openalex.org/W1993409002","https://openalex.org/W1993882792","https://openalex.org/W1995562189","https://openalex.org/W2005708641","https://openalex.org/W2035424729","https://openalex.org/W2062227835","https://openalex.org/W2075925017","https://openalex.org/W2079623482","https://openalex.org/W2087402357","https://openalex.org/W2089917322","https://openalex.org/W2107878631","https://openalex.org/W2112739286","https://openalex.org/W2114016253","https://openalex.org/W2115730999","https://openalex.org/W2116261113","https://openalex.org/W2116456623","https://openalex.org/W2131342762","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2155273149","https://openalex.org/W2160815625","https://openalex.org/W2293009711","https://openalex.org/W2293634267","https://openalex.org/W2403195671","https://openalex.org/W2403787182","https://openalex.org/W2962719052","https://openalex.org/W2963574257","https://openalex.org/W3102751229"],"related_works":["https://openalex.org/W4225394202","https://openalex.org/W4298287631","https://openalex.org/W2109916967","https://openalex.org/W2950022897","https://openalex.org/W2890297197","https://openalex.org/W2101697354","https://openalex.org/W1538193578","https://openalex.org/W3206636855","https://openalex.org/W1538606284","https://openalex.org/W2373874059"],"abstract_inverted_index":{"Long":[0],"short-term":[1],"memory":[2],"(LSTM)":[3],"recurrent":[4,75],"neural":[5,41,51,76],"networks":[6,42,52],"(RNNs)":[7],"have":[8,44],"been":[9],"shown":[10],"to":[11,24,48,58],"give":[12],"state-of-the-art":[13],"performance":[14],"on":[15,118],"many":[16],"speech":[17,92,151],"recognition":[18,152],"tasks,":[19],"as":[20,54,73,100],"they":[21,55],"are":[22,56],"able":[23,57],"provide":[25],"the":[26,36,39,64,83,88,108,123,133,145],"learned":[27],"dynamically":[28],"changing":[29],"contextual":[30],"window":[31],"of":[32,103,140],"all":[33],"sequence":[34],"history.On":[35],"other":[37],"hand,":[38],"convolutional":[40,74],"(CNNs)":[43],"brought":[45],"significant":[46],"improvements":[47],"deep":[49],"feed-forward":[50],"(FFNNs),":[53],"better":[59],"reduce":[60],"spectral":[61],"variation":[62],"in":[63],"input":[65],"signal.In":[66],"this":[67],"paper,":[68],"a":[69,101,113],"network":[70,77,115],"architecture":[71],"called":[72],"(CRNN)":[78],"is":[79,98,116],"proposed":[80,89,134],"by":[81],"combining":[82],"CNN":[84],"and":[85,111,127,132],"LSTM":[86,114,130,135,146],"RNN.In":[87],"CRNNs,":[90],"each":[91,119],"frame,":[93],"without":[94],"adjacent":[95],"context":[96],"frames,":[97],"organized":[99],"number":[102,139],"local":[104],"feature":[105,120],"patches":[106],"along":[107,122],"frequency":[109],"axis,":[110],"then":[112],"performed":[117],"patch":[121],"time":[124],"axis.We":[125],"train":[126],"compare":[128],"FFNNs,":[129],"RNNs":[131],"CRNNs":[136,147],"at":[137],"various":[138],"configurations.Experimental":[141],"results":[142],"show":[143],"that":[144],"can":[148],"exceed":[149],"stateof-the-art":[150],"performance.":[153]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
