{"id":"https://openalex.org/W4200392450","doi":"https://doi.org/10.1109/mm.2021.3139027","title":"Optimizing Distributed DNN Training Using CPUs and BlueField-2 DPUs","display_name":"Optimizing Distributed DNN Training Using CPUs and BlueField-2 DPUs","publication_year":2021,"publication_date":"2021-12-30","ids":{"openalex":"https://openalex.org/W4200392450","doi":"https://doi.org/10.1109/mm.2021.3139027"},"language":"en","primary_location":{"id":"doi:10.1109/mm.2021.3139027","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2021.3139027","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057769693","display_name":"Arpan Jain","orcid":"https://orcid.org/0000-0003-2522-8522"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arpan Jain","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061967476","display_name":"Nawras Alnaasan","orcid":"https://orcid.org/0000-0002-3638-4144"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nawras Alnaasan","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078128277","display_name":"Aamir Shafi","orcid":"https://orcid.org/0000-0002-1924-2769"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aamir Shafi","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034293705","display_name":"Hari Subramoni","orcid":"https://orcid.org/0000-0002-1200-2754"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hari Subramoni","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5057769693"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.4803,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.66151961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"42","issue":"2","first_page":"53","last_page":"60"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8999381065368652},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5874929428100586},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5318545699119568},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.49645429849624634},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4539267420768738},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4493074417114258},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3793054223060608},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34704896807670593},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.25376707315444946}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8999381065368652},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5874929428100586},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5318545699119568},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.49645429849624634},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4539267420768738},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4493074417114258},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3793054223060608},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34704896807670593},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25376707315444946},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mm.2021.3139027","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2021.3139027","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4300000071525574}],"awards":[{"id":"https://openalex.org/G2744889516","display_name":null,"funder_award_id":"#2007991","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3307200887","display_name":null,"funder_award_id":"#2018627","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3628697998","display_name":null,"funder_award_id":"#1931537","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G387029739","display_name":null,"funder_award_id":"#1818253","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5309149711","display_name":null,"funder_award_id":"#1854828","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2622263826","https://openalex.org/W2984696222","https://openalex.org/W2986124642","https://openalex.org/W3115267280","https://openalex.org/W3132977829","https://openalex.org/W3165940308","https://openalex.org/W3176615255","https://openalex.org/W3204524224","https://openalex.org/W4301239768","https://openalex.org/W6739622702","https://openalex.org/W6748645090"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W1530808388","https://openalex.org/W4366179611","https://openalex.org/W2996078371"],"abstract_inverted_index":{"The":[0],"deep":[1],"learning":[2],"(DL)":[3],"training":[4,108,131],"process":[5],"consists":[6],"of":[7,14,40,48,65,73,85,106,136,148],"multiple":[8,98],"phases\u2014data":[9],"augmentation,":[10],"training,":[11],"and":[12,96],"validation":[13],"the":[15,25,54,62,86,91,104,110,117,134,141,146],"trained":[16],"model.":[17],"Traditionally,":[18],"these":[19],"phases":[20,47,105],"are":[21,120],"executed":[22],"either":[23],"on":[24,90],"central":[26],"processing":[27,31,57],"units":[28,32,58],"or":[29],"graphics":[30],"in":[33,128],"a":[34],"serial":[35],"fashion":[36],"due":[37],"to":[38,44,82,101,109,122,125,144,150],"lack":[39],"additional":[41,87],"computing":[42],"resources":[43],"offload":[45,103],"independent":[46],"DL":[49,107,130,152],"training.":[50,153],"Recently,":[51],"Mellanox/NVIDIA":[52],"introduced":[53],"BlueField-2":[55,92],"data":[56],"(DPUs),":[59],"which":[60],"combine":[61],"advanced":[63],"capabilities":[64],"traditional":[66],"application-specific-integrated-circuit-based":[67],"network":[68],"adapters":[69],"with":[70],"an":[71],"array":[72],"ARM":[74,88],"processors.":[75],"In":[76],"this":[77,139],"article,":[78],"we":[79],"explore":[80,145],"how":[81],"take":[83],"advantage":[84],"cores":[89],"DPUs.":[93,111],"We":[94],"propose":[95],"evaluate":[97],"novel":[99],"designs":[100,119],"efficiently":[102],"Our":[112],"experimental":[113],"results":[114],"show":[115],"that":[116],"proposed":[118],"able":[121],"deliver":[123],"up":[124],"17.5%":[126],"improvement":[127],"overall":[129],"time.":[132],"To":[133],"best":[135],"our":[137],"knowledge,":[138],"is":[140],"first":[142],"work":[143],"use":[147],"DPUs":[149],"accelerate":[151]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
