{"id":"https://openalex.org/W2094233035","doi":"https://doi.org/10.1109/icassp.2014.6853593","title":"On parallelizability of stochastic gradient descent for speech DNNS","display_name":"On parallelizability of stochastic gradient descent for speech DNNS","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W2094233035","doi":"https://doi.org/10.1109/icassp.2014.6853593","mag":"2094233035"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2014.6853593","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853593","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072932051","display_name":"Frank Seide","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Frank Seide","raw_affiliation_strings":["Microsoft Research Asia, Beijing, P.R.C","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, P.R.C","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101611862","display_name":"Hao Fu","orcid":"https://orcid.org/0000-0002-5199-862X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Fu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, P.R.C","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, P.R.C","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012153296","display_name":"Jasha Droppo","orcid":"https://orcid.org/0000-0001-6097-0090"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jasha Droppo","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research,Redmond,WA,USA]"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research,Redmond,WA,USA]","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100438655","display_name":"Gang Li","orcid":"https://orcid.org/0000-0001-9755-2781"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Li","raw_affiliation_strings":["Microsoft Research Asia, Beijing, P.R.C","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, P.R.C","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research,Redmond,WA,USA]"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research,Redmond,WA,USA]","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5072932051"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":16.4909,"has_fulltext":false,"cited_by_count":81,"citation_normalized_percentile":{"value":0.9916309,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"235","last_page":"239"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8041183948516846},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.8011401295661926},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6959587335586548},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6350100636482239},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5609355568885803},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5550421476364136},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4994339942932129},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.46520334482192993},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.4451962113380432},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43945837020874023},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4261760711669922},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34491586685180664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2862734794616699},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.20178216695785522},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.07580465078353882}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8041183948516846},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.8011401295661926},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6959587335586548},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6350100636482239},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5609355568885803},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5550421476364136},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4994339942932129},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.46520334482192993},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.4451962113380432},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43945837020874023},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4261760711669922},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34491586685180664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2862734794616699},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.20178216695785522},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.07580465078353882},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2014.6853593","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853593","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W196761320","https://openalex.org/W217970951","https://openalex.org/W1218987319","https://openalex.org/W1498436455","https://openalex.org/W2000200144","https://openalex.org/W2008353316","https://openalex.org/W2011815965","https://openalex.org/W2012257340","https://openalex.org/W2071310251","https://openalex.org/W2087402357","https://openalex.org/W2093794678","https://openalex.org/W2114016253","https://openalex.org/W2120480077","https://openalex.org/W2136922672","https://openalex.org/W2146502635","https://openalex.org/W2147768505","https://openalex.org/W2160306971","https://openalex.org/W2162390675","https://openalex.org/W2164278908","https://openalex.org/W2168231600","https://openalex.org/W2253807446","https://openalex.org/W2261806027","https://openalex.org/W2394932179","https://openalex.org/W2403195671","https://openalex.org/W4292363360","https://openalex.org/W6608133726","https://openalex.org/W6678242812","https://openalex.org/W6682034417","https://openalex.org/W6683722107","https://openalex.org/W6684859321","https://openalex.org/W6803034309","https://openalex.org/W6814477976"],"related_works":["https://openalex.org/W2966297898","https://openalex.org/W2955229517","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2177838837","https://openalex.org/W2963831937","https://openalex.org/W2950520577","https://openalex.org/W3209384898","https://openalex.org/W74409296","https://openalex.org/W1595834484"],"abstract_inverted_index":{"This":[0],"paper":[1],"compares":[2],"the":[3,25,75],"theoretical":[4],"efficiency":[5,69],"of":[6,15,54,81,86,103,113,127,129],"model-parallel":[7],"and":[8,33,83,89,92,134],"data-parallel":[9,67],"distributed":[10],"stochastic":[11],"gradient":[12],"descent":[13],"training":[14,68],"DNNs.":[16],"For":[17],"a":[18,44,51,79],"typical":[19],"Switchboard":[20],"DNN":[21],"with":[22,39,50],"46M":[23],"parameters,":[24],"results":[26],"are":[27],"not":[28,57,110],"pretty:":[29],"With":[30],"modern":[31],"GPUs":[32,42],"interconnects,":[34],"model":[35],"parallelism":[36,49],"is":[37],"optimal":[38],"only":[40],"3":[41],"in":[43],"single":[45],"server,":[46],"while":[47],"data":[48,93],"minibatch":[52,76,90],"size":[53,77],"1024":[55],"does":[56],"even":[58],"scale":[59],"to":[60,115],"2":[61],"GPUs.":[62],"We":[63,95,108],"further":[64],"show":[65],"that":[66,121],"can":[70],"be":[71],"improved":[72],"by":[73],"increasing":[74],"(through":[78],"combination":[80],"AdaGrad":[82],"automatic":[84],"adjustments":[85],"learning":[87],"rate":[88],"size)":[91],"compression.":[94],"arrive":[96],"at":[97],"an":[98],"estimated":[99],"possible":[100],"end-to-end":[101],"speed-up":[102],"5":[104],"times":[105],"or":[106,118],"more.":[107],"do":[109],"address":[111],"issues":[112,120],"robustness":[114],"process":[116],"failure":[117],"other":[119],"might":[122],"occur":[123],"during":[124],"training,":[125],"nor":[126],"speed":[128],"convergence":[130],"differences":[131],"between":[132],"ASGD":[133],"SGD":[135],"parameter":[136],"update":[137],"patterns.":[138]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":6}],"updated_date":"2026-04-05T06:14:27.290980","created_date":"2025-10-10T00:00:00"}
