{"id":"https://openalex.org/W2112984492","doi":"https://doi.org/10.1109/icassp.2012.6288897","title":"Exploiting sparseness in deep neural networks for large vocabulary speech recognition","display_name":"Exploiting sparseness in deep neural networks for large vocabulary speech recognition","publication_year":2012,"publication_date":"2012-03-01","ids":{"openalex":"https://openalex.org/W2112984492","doi":"https://doi.org/10.1109/icassp.2012.6288897","mag":"2112984492"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2012.6288897","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2012.6288897","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Microsoft Research, Redmond, USA","Microsoft Research, , Redmond, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Research, , Redmond, USA#TAB#","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072932051","display_name":"Frank Seide","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Frank Seide","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100438769","display_name":"Gang Li","orcid":"https://orcid.org/0000-0003-1583-641X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Li","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100671324","display_name":"Li Deng","orcid":"https://orcid.org/0000-0002-1014-0790"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Deng","raw_affiliation_strings":["Microsoft Research, Redmond, USA","Microsoft Research, , Redmond, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Research, , Redmond, USA#TAB#","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034476404"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":21.2156,"has_fulltext":false,"cited_by_count":141,"citation_normalized_percentile":{"value":0.99488217,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4409","last_page":"4412"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8164170980453491},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5567895770072937},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5386296510696411},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5375248193740845},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5225160121917725},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4882986545562744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.485705703496933},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.48048606514930725},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46103572845458984},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4577452540397644},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.45769625902175903},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.45555102825164795},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4281243681907654},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4096321761608124},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3527657389640808},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18171215057373047},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09239760041236877}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8164170980453491},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5567895770072937},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5386296510696411},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5375248193740845},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5225160121917725},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4882986545562744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.485705703496933},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48048606514930725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46103572845458984},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4577452540397644},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.45769625902175903},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.45555102825164795},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4281243681907654},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4096321761608124},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3527657389640808},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18171215057373047},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09239760041236877},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp.2012.6288897","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2012.6288897","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.368.3816","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.368.3816","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.microsoft.com/pubs/157584/DNN-Sparse-ICASSP2012.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2100495367","https://openalex.org/W2114766824","https://openalex.org/W2125389748","https://openalex.org/W2139432235","https://openalex.org/W2147768505","https://openalex.org/W2160306971","https://openalex.org/W2169189000","https://openalex.org/W2169434751","https://openalex.org/W2394932179","https://openalex.org/W2616657226","https://openalex.org/W2725061391","https://openalex.org/W6677103964","https://openalex.org/W6678583879","https://openalex.org/W6680579697","https://openalex.org/W6685119187","https://openalex.org/W6696569386","https://openalex.org/W6711962127","https://openalex.org/W6803034309"],"related_works":["https://openalex.org/W1574295218","https://openalex.org/W2159208002","https://openalex.org/W2150890698","https://openalex.org/W3133710586","https://openalex.org/W2038107365","https://openalex.org/W4245698648","https://openalex.org/W1748295792","https://openalex.org/W2112039799","https://openalex.org/W2080758042","https://openalex.org/W1487851247"],"abstract_inverted_index":{"Recently,":[0],"we":[1,41,166],"developed":[2],"context-dependent":[3],"deep":[4],"neural":[5],"network":[6],"(DNN)":[7],"hidden":[8],"Markov":[9],"models":[10],"for":[11,48],"large":[12],"vocabulary":[13],"speech":[14,160],"recognition.":[15],"While":[16],"reducing":[17,127],"errors":[18],"by":[19,56,131],"33%":[20],"compared":[21],"to":[22,90,96,123,148,172,178],"its":[23],"discriminatively":[24],"trained":[25],"Gaussian-mixture":[26],"counterpart":[27],"on":[28,46,109,138,153,183],"the":[29,62,79,92,110,118,128,134,154,169],"switchboard":[30,113],"benchmark":[31],"task,":[32],"DNN":[33,47],"requires":[34],"much":[35],"more":[36],"parameters.":[37],"In":[38],"this":[39],"paper,":[40],"report":[42],"our":[43],"recent":[44],"work":[45],"improved":[49],"generalization,":[50],"model":[51,98,137,170],"size,":[52],"and":[53,70,75,100,112,151,174,176,180],"computation":[54,101,177],"speed":[55],"exploiting":[57],"parameter":[58],"sparseness.":[59],"We":[60,84],"formulate":[61],"goal":[63],"of":[64,120],"enforcing":[65],"sparseness":[66,94],"as":[67],"soft":[68],"regularization":[69],"convex":[71],"constraint":[72],"optimization":[73],"problems,":[74],"propose":[76,86],"solutions":[77,105],"under":[78],"stochastic":[80],"gradient":[81],"ascent":[82],"setting.":[83],"also":[85],"novel":[87],"data":[88],"structures":[89],"exploit":[91],"random":[93],"patterns":[95],"reduce":[97,168],"size":[99,171],"time.":[102],"The":[103,141],"proposed":[104],"have":[106,116,144],"been":[107,145],"evaluated":[108],"voice-search":[111],"datasets.":[114,140,186],"They":[115],"decreased":[117],"number":[119],"nonzero":[121,142],"connections":[122,143],"one":[124],"third":[125],"while":[126],"error":[129],"rate":[130],"0.2-0.3%":[132],"over":[133],"fully":[135],"connected":[136],"both":[139],"further":[146],"reduced":[147],"only":[149],"12%":[150],"19%":[152],"two":[155,185],"respective":[156],"datasets":[157],"without":[158],"sacrificing":[159],"recognition":[161],"performance.":[162],"Under":[163],"these":[164,184],"conditions":[165],"can":[167],"18%":[173],"29%,":[175],"14%":[179],"23%,":[181],"respectively,":[182]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":25},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":16},{"year":2013,"cited_by_count":13},{"year":2012,"cited_by_count":7}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
