{"id":"https://openalex.org/W4207055127","doi":"https://doi.org/10.1109/tpds.2022.3144453","title":"DLS: A Fast and Flexible Neural Network Training System With Fine-grained Heterogeneous Device Orchestration","display_name":"DLS: A Fast and Flexible Neural Network Training System With Fine-grained Heterogeneous Device Orchestration","publication_year":2022,"publication_date":"2022-01-21","ids":{"openalex":"https://openalex.org/W4207055127","doi":"https://doi.org/10.1109/tpds.2022.3144453"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2022.3144453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2022.3144453","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088973675","display_name":"Pyeongsu Park","orcid":"https://orcid.org/0000-0002-4032-3790"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Pyeongsu Park","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-4032-3790","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329207","display_name":"Jaewon Lee","orcid":"https://orcid.org/0000-0003-3222-5020"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaewon Lee","raw_affiliation_strings":["Facebook, Menlo Park, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Facebook, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041885449","display_name":"Heetaek Jeong","orcid":"https://orcid.org/0000-0002-7346-5170"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Heetaek Jeong","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101743105","display_name":"Jangwoo Kim","orcid":"https://orcid.org/0000-0003-2193-5748"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jangwoo Kim","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-2193-5748","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1015,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.3433029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"33","issue":"11","first_page":"3194","last_page":"3206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8215266466140747},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5817150473594666},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5253369808197021},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.48636844754219055},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.45723897218704224},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.4519781470298767},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.42401134967803955},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38735443353652954},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30338844656944275}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8215266466140747},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5817150473594666},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5253369808197021},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.48636844754219055},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.45723897218704224},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.4519781470298767},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.42401134967803955},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38735443353652954},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30338844656944275},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2022.3144453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2022.3144453","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G365287222","display_name":null,"funder_award_id":"SRFC-IT1901-12","funder_id":"https://openalex.org/F4320332195","funder_display_name":"Samsung"}],"funders":[{"id":"https://openalex.org/F4320321292","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542"},{"id":"https://openalex.org/F4320332195","display_name":"Samsung","ror":"https://ror.org/04w3jy968"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2048266589","https://openalex.org/W2146502635","https://openalex.org/W2309627465","https://openalex.org/W2405578611","https://openalex.org/W2417576069","https://openalex.org/W2515287984","https://openalex.org/W2527036487","https://openalex.org/W2559156603","https://openalex.org/W2588610957","https://openalex.org/W2606722458","https://openalex.org/W2611650229","https://openalex.org/W2617411258","https://openalex.org/W2741959974","https://openalex.org/W2767434619","https://openalex.org/W2774000609","https://openalex.org/W2786414509","https://openalex.org/W2799162093","https://openalex.org/W2803808038","https://openalex.org/W2883929540","https://openalex.org/W2893114493","https://openalex.org/W2904125685","https://openalex.org/W2905104204","https://openalex.org/W2928010249","https://openalex.org/W2931118404","https://openalex.org/W2953384591","https://openalex.org/W2962710043","https://openalex.org/W2963066927","https://openalex.org/W2970157301","https://openalex.org/W2979719709","https://openalex.org/W3011280959","https://openalex.org/W3022548332","https://openalex.org/W3101156210","https://openalex.org/W4249932213","https://openalex.org/W4297685247","https://openalex.org/W6681435938","https://openalex.org/W6713134421","https://openalex.org/W6733590821","https://openalex.org/W6738144653","https://openalex.org/W6742012201","https://openalex.org/W6745995898","https://openalex.org/W6746839373","https://openalex.org/W6748019269","https://openalex.org/W6748687944","https://openalex.org/W6751546485","https://openalex.org/W6767278793"],"related_works":["https://openalex.org/W2160425906","https://openalex.org/W1544908136","https://openalex.org/W1882733036","https://openalex.org/W2002703587","https://openalex.org/W2546696010","https://openalex.org/W3217667592","https://openalex.org/W2389719923","https://openalex.org/W2109998134","https://openalex.org/W1572108542","https://openalex.org/W2140625810"],"abstract_inverted_index":{"Neural":[0],"network":[1,118],"accelerators":[2,40],"(e.g.,":[3,50,146],"TPUs)":[4],"have":[5],"become":[6],"mainstream":[7],"devices":[8,145,160,202],"in":[9,75,93,216],"computing":[10,123],"systems.":[11],"Unfortunately,":[12],"the":[13,24,29,36,39,44,197],"existing":[14],"accelerator-based":[15],"systems":[16],"for":[17,137,149,238],"neural":[18,48,94,117,222],"networks":[19,49,95,223],"fail":[20],"to":[21,28,83,129,162,167,244],"fully":[22],"leverage":[23],"acceleration":[25,110,119],"opportunities":[26],"due":[27,166],"<italic":[30,62],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[31,63],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">limited":[32],"flexibility</i>":[33],".":[34],"Specifically,":[35],"majority":[37],"of":[38,47,61,77,158],"focus":[41],"on":[42,133,143],"only":[43],"compute-intensive":[45,131],"operations":[46,107,132,142,227],"convolution":[51],"and":[52,69,81,108,140,187,209,213,235],"fully-connected":[53],"layers).":[54],"However,":[55],"we":[56,112,176],"identify":[57],"that":[58,90,154,229],"sub-optimal":[59],"handling":[60],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">auxiliary":[64],"operations</i>":[65],"such":[66],"as":[67],"embedding":[68],"compression":[70],"can":[71],"incur":[72],"non-trivial":[73],"loss":[74],"terms":[76],"accuracy,":[78],"training":[79,243,246],"speed,":[80],"adaptability":[82],"new":[84],"domains.":[85],"The":[86,125,218],"problem":[87],"persists":[88],"considering":[89],"recent":[91],"advancements":[92],"often":[96],"come":[97],"from":[98,241],"auxiliary":[99,106,141,226],"operations.":[100],"To":[101,171],"effectively":[102],"handle":[103],"rapidly":[104],"evolving":[105],"maximize":[109],"opportunities,":[111],"propose":[113,177],"DLS,":[114],"a":[115,155],"holistic":[116],"system":[120,195,231],"using":[121,220],"heterogeneous":[122,201],"devices.":[124],"key":[126],"idea":[127],"is":[128],"distribute":[130],"highly":[134],"specialized":[135],"ASICs":[136],"maximum":[138],"performance,":[139],"flexible":[144],"FPGA,":[147],"GPU)":[148],"better":[150],"adaptability.":[151],"We":[152],"emphasize":[153],"na\u00efve":[156],"integration":[157],"different":[159],"fails":[161],"deliver":[163],"high":[164,168,236],"performance":[165,237],"communication":[169,174,186,198,214],"overheads.":[170],"address":[172],"this":[173,192],"inefficiency,":[175],"an":[178],"efficient":[179],"FPGA-based":[180],"device":[181],"orchestration":[182],"utilizing":[183],"direct":[184],"device-to-device":[185],"fine-grained":[188],"operation":[189],"scheduling.":[190],"In":[191],"way,":[193],"our":[194,230],"alleviates":[196],"overhead":[199],"between":[200],"by":[203],"removing":[204],"expensive":[205],"kernel":[206],"stack":[207],"traversal":[208],"leveraging":[210],"computation":[211],"units":[212],"links":[215],"parallel.":[217],"evaluation":[219],"popular":[221],"with":[224],"emerging":[225],"shows":[228],"achieves":[232],"both":[233],"flexibility":[234],"various":[239],"cases":[240],"single-accelerator":[242],"distributed":[245],"(2.6\u20138.9\u00d7":[247],"speedup).":[248]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
