{"id":"https://openalex.org/W2405883473","doi":"https://doi.org/10.1109/icassp.2016.7472805","title":"Scalable training of deep learning machines by incremental block training with intra-block parallel optimization and blockwise model-update filtering","display_name":"Scalable training of deep learning machines by incremental block training with intra-block parallel optimization and blockwise model-update filtering","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2405883473","doi":"https://doi.org/10.1109/icassp.2016.7472805","mag":"2405883473"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100437924","display_name":"Kai Chen","orcid":"https://orcid.org/0000-0001-6384-0355"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kai Chen","raw_affiliation_strings":["Microsoft Research, Beijing, China","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039662070","display_name":"Qiang Huo","orcid":"https://orcid.org/0000-0003-2464-6482"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Huo","raw_affiliation_strings":["Microsoft Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100437924"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":33.8497,"has_fulltext":false,"cited_by_count":162,"citation_normalized_percentile":{"value":0.99689126,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5880","last_page":"5884"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9003053903579712},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6664189696311951},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6573506593704224},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.616041362285614},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5946804285049438},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5630558133125305},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5455545783042908},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5164023041725159},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5117985606193542},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5090360045433044},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4856451749801636},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4257374405860901},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.40225544571876526},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35773712396621704},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.345022976398468}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9003053903579712},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6664189696311951},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6573506593704224},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.616041362285614},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5946804285049438},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5630558133125305},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5455545783042908},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5164023041725159},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5117985606193542},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5090360045433044},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4856451749801636},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4257374405860901},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.40225544571876526},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35773712396621704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.345022976398468},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7472805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W97072897","https://openalex.org/W104184427","https://openalex.org/W130515627","https://openalex.org/W1489125746","https://openalex.org/W1498436455","https://openalex.org/W1677182931","https://openalex.org/W1779452081","https://openalex.org/W1845277745","https://openalex.org/W1978660892","https://openalex.org/W1988720110","https://openalex.org/W2004220942","https://openalex.org/W2005708641","https://openalex.org/W2026369565","https://openalex.org/W2035846950","https://openalex.org/W2087402357","https://openalex.org/W2102113734","https://openalex.org/W2138243089","https://openalex.org/W2138302120","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2152563462","https://openalex.org/W2160815625","https://openalex.org/W2162390675","https://openalex.org/W2163605009","https://openalex.org/W2164278908","https://openalex.org/W2166637769","https://openalex.org/W2166706236","https://openalex.org/W2168231600","https://openalex.org/W2171312815","https://openalex.org/W2293009711","https://openalex.org/W2293634267","https://openalex.org/W2294723541","https://openalex.org/W2329068866","https://openalex.org/W2394932179","https://openalex.org/W2397424575","https://openalex.org/W2399364384","https://openalex.org/W2405578611","https://openalex.org/W2407022425","https://openalex.org/W2596013733","https://openalex.org/W2618530766","https://openalex.org/W2951781666","https://openalex.org/W2963804082","https://openalex.org/W2963920996","https://openalex.org/W4256161595","https://openalex.org/W4285719527","https://openalex.org/W4292363360","https://openalex.org/W4297797495","https://openalex.org/W6604254268","https://openalex.org/W6605254082","https://openalex.org/W6629052376","https://openalex.org/W6638005537","https://openalex.org/W6638803421","https://openalex.org/W6675365184","https://openalex.org/W6680748266","https://openalex.org/W6683722107","https://openalex.org/W6684191040","https://openalex.org/W6684249991","https://openalex.org/W6684859321","https://openalex.org/W6696934422","https://openalex.org/W6696982659","https://openalex.org/W6697185088","https://openalex.org/W6712773019","https://openalex.org/W6713835734","https://openalex.org/W6714239094","https://openalex.org/W6764309476"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2027972911","https://openalex.org/W2146343568","https://openalex.org/W2499279132","https://openalex.org/W2966297898","https://openalex.org/W1974690493","https://openalex.org/W2955229517"],"abstract_inverted_index":{"We":[0,95],"present":[1],"a":[2,37,129,138],"new":[3],"approach":[4],"to":[5,20,28,48,101],"scalable":[6],"training":[7,15,136],"of":[8,127],"deep":[9,60,73],"learning":[10,30,46],"machines":[11],"by":[12],"incremental":[13],"block":[14],"with":[16,41,115,125],"intra-block":[17],"parallel":[18,50],"optimization":[19],"leverage":[21],"data":[22],"parallelism":[23],"and":[24,53,70,91,108],"blockwise":[25],"model-update":[26],"filtering":[27],"stabilize":[29],"process.":[31],"By":[32],"using":[33],"an":[34,42],"implementation":[35],"on":[36,83,105,112,137],"distributed":[38],"GPU":[39,103,110],"cluster":[40],"MPI-based":[43],"HPC":[44],"machine":[45],"framework":[47],"coordinate":[49],"job":[51],"scheduling":[52],"collective":[54],"communication,":[55],"we":[56],"have":[57],"trained":[58],"successfully":[59],"bidirectional":[61],"long":[62],"short-term":[63],"memory":[64],"(LSTM)":[65],"recurrent":[66],"neural":[67,74],"networks":[68,75],"(RNNs)":[69],"fully-connected":[71],"feed-forward":[72],"(DNNs)":[76],"for":[77],"large":[78],"vocabulary":[79],"continuous":[80],"speech":[81],"recognition":[82,121],"two":[84],"benchmark":[85],"tasks,":[86],"namely":[87],"309-hour":[88],"Switchboard-I":[89],"task":[90,107],"1,860-hour":[92],"\"Switch-board+Fisher\"":[93],"task.":[94],"achieve":[96],"almost":[97],"linear":[98],"speedup":[99],"up":[100],"16":[102],"cards":[104,111],"LSTM":[106],"64":[109],"DNN":[113],"task,":[114],"either":[116],"no":[117],"degradation":[118],"or":[119],"improved":[120],"accuracy":[122],"in":[123],"comparison":[124],"that":[126],"running":[128],"traditional":[130],"mini-batch":[131],"based":[132],"stochastic":[133],"gradient":[134],"descent":[135],"single":[139],"GPU.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":26},{"year":2020,"cited_by_count":38},{"year":2019,"cited_by_count":37},{"year":2018,"cited_by_count":21},{"year":2017,"cited_by_count":14},{"year":2016,"cited_by_count":7}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
