{"id":"https://openalex.org/W3081168214","doi":"https://doi.org/10.1145/3394486.3406703","title":"DeepSpeed","display_name":"DeepSpeed","publication_year":2020,"publication_date":"2020-08-20","ids":{"openalex":"https://openalex.org/W3081168214","doi":"https://doi.org/10.1145/3394486.3406703","mag":"3081168214"},"language":"en","primary_location":{"id":"doi:10.1145/3394486.3406703","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394486.3406703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037534081","display_name":"Jeff Rasley","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeff Rasley","raw_affiliation_strings":["Microsoft, Bellevue, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069595628","display_name":"Samyam Rajbhandari","orcid":"https://orcid.org/0000-0002-0386-8759"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samyam Rajbhandari","raw_affiliation_strings":["Microsoft, Bellevue, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022644245","display_name":"Olatunji Ruwase","orcid":"https://orcid.org/0000-0002-5508-0728"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Olatunji Ruwase","raw_affiliation_strings":["Microsoft, Bellevue, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040302174","display_name":"Yuxiong He","orcid":"https://orcid.org/0000-0003-0478-8854"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuxiong He","raw_affiliation_strings":["Microsoft, Bellevue, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":28.0275,"has_fulltext":false,"cited_by_count":709,"citation_normalized_percentile":{"value":0.99715734,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3505","last_page":"3506"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.8446000218391418,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8428000211715698,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8601477742195129},{"id":"https://openalex.org/keywords/turing","display_name":"Turing","score":0.5892095565795898},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.5748022198677063},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.4367055594921112},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.43194639682769775},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4303329586982727},{"id":"https://openalex.org/keywords/turing-machine","display_name":"Turing machine","score":0.41893208026885986},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.34914863109588623},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34903332591056824},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2145896553993225},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1939278244972229},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.15534105896949768}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8601477742195129},{"id":"https://openalex.org/C9870796","wikidata":"https://www.wikidata.org/wiki/Q490481","display_name":"Turing","level":2,"score":0.5892095565795898},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.5748022198677063},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.4367055594921112},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.43194639682769775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4303329586982727},{"id":"https://openalex.org/C29248071","wikidata":"https://www.wikidata.org/wiki/Q163310","display_name":"Turing machine","level":3,"score":0.41893208026885986},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.34914863109588623},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34903332591056824},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2145896553993225},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1939278244972229},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.15534105896949768}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3394486.3406703","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394486.3406703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W2763421725","https://openalex.org/W6745245109"],"related_works":["https://openalex.org/W2119244842","https://openalex.org/W4285436350","https://openalex.org/W4324016496","https://openalex.org/W2215138167","https://openalex.org/W4254404873","https://openalex.org/W4231495498","https://openalex.org/W2090113742","https://openalex.org/W4312201671","https://openalex.org/W2306527987","https://openalex.org/W3160192459"],"abstract_inverted_index":{"Explore":[0],"new":[1,43],"techniques":[2],"in":[3],"Microsoft's":[4],"open":[5],"source":[6],"library":[7],"called":[8,39],"DeepSpeed,":[9],"which":[10,80],"advances":[11],"large":[12],"model":[13,53,93],"training":[14],"by":[15],"improving":[16],"scale,":[17],"speed,":[18],"cost,":[19],"and":[20,54],"usability,":[21],"unlocking":[22],"the":[23,49,60,82,88,112,117],"ability":[24],"to":[25,73,115],"train":[26],"100-billion-parameter":[27],"models.":[28],"DeepSpeed":[29,113],"is":[30,41],"compatible":[31],"with":[32],"PyTorch.":[33],"One":[34],"piece":[35],"of":[36,62,84],"our":[37,105],"library,":[38],"ZeRO,":[40],"a":[42],"parallelized":[44],"optimizer":[45],"that":[46,64,110],"greatly":[47],"reduces":[48],"resources":[50],"needed":[51],"for":[52],"data":[55],"parallelism":[56],"while":[57],"massively":[58],"increasing":[59],"number":[61],"parameters":[63],"can":[65],"be":[66],"trained.":[67],"Researchers":[68],"have":[69],"used":[70],"these":[71],"breakthroughs":[72],"create":[74],"Turing":[75],"Natural":[76],"Language":[77],"Generation":[78],"(Turing-NLG),":[79],"at":[81,94],"time":[83],"its":[85],"release":[86],"was":[87],"largest":[89],"publicly":[90],"known":[91],"language":[92],"17":[95],"billion":[96],"parameters.":[97],"In":[98],"addition":[99],"we":[100],"will":[101],"also":[102],"go":[103],"over":[104],"latest":[106],"transformer":[107],"kernel":[108],"advancements":[109],"led":[111],"team":[114],"achieve":[116],"world":[118],"fastest":[119],"BERT":[120],"pretraining":[121],"record.":[122]},"counts_by_year":[{"year":2026,"cited_by_count":61},{"year":2025,"cited_by_count":259},{"year":2024,"cited_by_count":182},{"year":2023,"cited_by_count":111},{"year":2022,"cited_by_count":46},{"year":2021,"cited_by_count":43},{"year":2020,"cited_by_count":7}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2020-09-01T00:00:00"}
