{"id":"https://openalex.org/W4406461361","doi":"https://doi.org/10.1109/bigdata62323.2024.10825722","title":"Efficient Data-parallel Distributed DNN Training for Big Dataset under Heterogeneous GPU Cluster","display_name":"Efficient Data-parallel Distributed DNN Training for Big Dataset under Heterogeneous GPU Cluster","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406461361","doi":"https://doi.org/10.1109/bigdata62323.2024.10825722"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825722","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825722","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063953385","display_name":"Shinyoung Ahn","orcid":"https://orcid.org/0000-0002-2686-7273"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Shinyoung Ahn","raw_affiliation_strings":["ETRI,Supercomputing System Research Section,Daejeon,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"ETRI,Supercomputing System Research Section,Daejeon,Republic of Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035611101","display_name":"Sookwang Lee","orcid":"https://orcid.org/0009-0002-3326-278X"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sookwang Lee","raw_affiliation_strings":["ETRI,Supercomputing System Research Section,Daejeon,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"ETRI,Supercomputing System Research Section,Daejeon,Republic of Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000406391","display_name":"Hyeonseong Choi","orcid":"https://orcid.org/0000-0002-7992-2360"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyeonseong Choi","raw_affiliation_strings":["MangoBoost,Software Team,Seoul,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"MangoBoost,Software Team,Seoul,Republic of Korea","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100405195","display_name":"Jaehyun Lee","orcid":"https://orcid.org/0000-0001-7384-7075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaehyun Lee","raw_affiliation_strings":["Puzzle AI Inc.,Research Institute,Daejeon,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Puzzle AI Inc.,Research Institute,Daejeon,Republic of Korea","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063953385"],"corresponding_institution_ids":["https://openalex.org/I142401562"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26680366,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"179","last_page":"188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8582130670547485},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.7039997577667236},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6852934956550598},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.5421662330627441},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5275827646255493},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4458393454551697},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.42920157313346863},{"id":"https://openalex.org/keywords/parallel-processing","display_name":"Parallel processing","score":0.4202391803264618},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.38665422797203064},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.35062548518180847},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.3497786521911621},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3334445059299469},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.32437729835510254},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.17886805534362793},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13727626204490662}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8582130670547485},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.7039997577667236},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6852934956550598},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.5421662330627441},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5275827646255493},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4458393454551697},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.42920157313346863},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.4202391803264618},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.38665422797203064},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35062548518180847},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.3497786521911621},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3334445059299469},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32437729835510254},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.17886805534362793},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13727626204490662},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825722","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825722","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1598866093","https://openalex.org/W1861492603","https://openalex.org/W2083842231","https://openalex.org/W2086161653","https://openalex.org/W2102017903","https://openalex.org/W2108598243","https://openalex.org/W2120432001","https://openalex.org/W2194775991","https://openalex.org/W2274162699","https://openalex.org/W2398934890","https://openalex.org/W2559655401","https://openalex.org/W2622263826","https://openalex.org/W2788193959","https://openalex.org/W2800893679","https://openalex.org/W2807970597","https://openalex.org/W2893813411","https://openalex.org/W2900824371","https://openalex.org/W2901299441","https://openalex.org/W2901541570","https://openalex.org/W2962747323","https://openalex.org/W2963037989","https://openalex.org/W2963964896","https://openalex.org/W3001279689","https://openalex.org/W3103606447","https://openalex.org/W3173623873","https://openalex.org/W3174394143","https://openalex.org/W3204998121","https://openalex.org/W3205803342","https://openalex.org/W4246399668","https://openalex.org/W4285504003","https://openalex.org/W4386076325","https://openalex.org/W6628377381","https://openalex.org/W6628973269","https://openalex.org/W6635810480","https://openalex.org/W6638803421","https://openalex.org/W6680402377","https://openalex.org/W6684859321","https://openalex.org/W6703420464","https://openalex.org/W6739622702","https://openalex.org/W6748425271","https://openalex.org/W6753209298","https://openalex.org/W6756488470","https://openalex.org/W6772383348","https://openalex.org/W6787953186","https://openalex.org/W6849520326"],"related_works":["https://openalex.org/W2056717482","https://openalex.org/W3189307731","https://openalex.org/W1428699136","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W2030707850","https://openalex.org/W1998560227","https://openalex.org/W2170611190","https://openalex.org/W2566934642","https://openalex.org/W2163816448"],"abstract_inverted_index":{"Training":[0],"large-scale":[1],"deep":[2,97],"neural":[3],"networks":[4],"(DNNs)":[5],"using":[6],"a":[7,93,123,132,159],"large":[8],"number":[9],"of":[10,112],"parameters":[11],"requires":[12],"significant":[13],"computational":[14],"resources.":[15,115],"Despite":[16],"the":[17,77,102,109,164,189],"rapid":[18],"advancements":[19],"in":[20,122],"GPU":[21,32,73,114,134],"technology,":[22],"limited":[23],"budgets":[24],"have":[25,64],"forced":[26],"many":[27,62],"institutions":[28],"to":[29,35,66,76,158],"gradually":[30],"build":[31],"servers,":[33],"leading":[34],"growing":[36],"challenges":[37],"with":[38],"resource":[39],"heterogeneity.":[40],"However,":[41],"most":[42],"open-source":[43],"distributed":[44,69,96,144],"deep-learning":[45],"libraries":[46],"use":[47],"synchronous":[48,167],"training":[49,70,138,146,168,171,188],"algorithms":[50],"that":[51,173],"perform":[52],"better":[53],"on":[54,58,71,91,153],"homogeneous":[55],"GPUs":[56],"than":[57,178,185],"heterogeneous":[59,72,113,133],"GPUs.":[60],"Therefore,":[61],"researchers":[63],"struggled":[65],"efficiently":[67],"conduct":[68],"clusters":[74],"owing":[75],"straggler":[78],"problem.":[79],"In":[80,131,162],"this":[81],"study,":[82],"we":[83],"introduce":[84],"Efficient":[85],"Distributed":[86],"Deep":[87],"learning":[88,98],"lIbrary":[89],"based":[90],"SoftMemoryBox(EDDIS),":[92],"novel":[94],"data-parallel":[95],"library.":[99],"EDDIS":[100,116,165],"overcomes":[101],"scalability":[103],"limitations":[104],"caused":[105],"by":[106,147],"heterogeneity,":[107],"enabling":[108],"efficient":[110],"utilization":[111],"trains":[117],"DNNs":[118],"synchronously,":[119],"asynchronously,":[120],"and":[121,126,129,141,151,181],"hybrid":[124,142,166],"manner":[125],"supports":[127],"TensorFlow":[128],"PyTorch.":[130],"environment,":[135],"EDDIS\u2019s":[136],"three":[137],"modes\u2014synchronous,":[139],"asynchronous,":[140],"synchronous\u2014accelerate":[143],"DNN":[145],"approximately":[148],"8.2x,":[149],"19x,":[150],"18.7x":[152],"16":[154],"nodes,":[155],"respectively,":[156],"compared":[157],"single":[160],"node.":[161],"particular,":[163],"mode":[169],"achieves":[170],"speeds":[172],"are":[174],"2.8":[175],"times":[176,183],"faster":[177,184],"PyTorch":[179],"DDP":[180],"2.3":[182],"Horovod":[186],"when":[187],"Yolov5m":[190],"model.":[191]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
