{"id":"https://openalex.org/W4310385035","doi":"https://doi.org/10.1145/3545008.3545090","title":"Lobster: Load Balance-Aware I/O for Distributed DNN Training","display_name":"Lobster: Load Balance-Aware I/O for Distributed DNN Training","publication_year":2022,"publication_date":"2022-08-29","ids":{"openalex":"https://openalex.org/W4310385035","doi":"https://doi.org/10.1145/3545008.3545090"},"language":"en","primary_location":{"id":"doi:10.1145/3545008.3545090","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545090","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545090","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545090","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100454075","display_name":"Jie Liu","orcid":"https://orcid.org/0000-0002-1782-2081"},"institutions":[{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jie Liu","raw_affiliation_strings":["University of California, Merced, United States of America","UC Merced - University of California [Merced] (5200 N. Lake Road, \r\nMerced, CA 95343 - United States)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Merced, United States of America","institution_ids":["https://openalex.org/I156087764"]},{"raw_affiliation_string":"UC Merced - University of California [Merced] (5200 N. Lake Road, \r\nMerced, CA 95343 - United States)","institution_ids":["https://openalex.org/I156087764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085745891","display_name":"Bogdan Nicolae","orcid":"https://orcid.org/0000-0002-0661-7509"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bogdan Nicolae","raw_affiliation_strings":["Argonne National Laboratory, United States of America","ANL - Argonne National Laboratory [Lemont] (9700 S Cass Ave B109, Lemont, IL, 60439 - United States)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, United States of America","institution_ids":["https://openalex.org/I1282105669"]},{"raw_affiliation_string":"ANL - Argonne National Laboratory [Lemont] (9700 S Cass Ave B109, Lemont, IL, 60439 - United States)","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100407387","display_name":"Dong Li","orcid":"https://orcid.org/0000-0001-9336-0694"},"institutions":[{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Li","raw_affiliation_strings":["University of California, Merced, United States of America","UC Merced - University of California [Merced] (5200 N. Lake Road, \r\nMerced, CA 95343 - United States)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Merced, United States of America","institution_ids":["https://openalex.org/I156087764"]},{"raw_affiliation_string":"UC Merced - University of California [Merced] (5200 N. Lake Road, \r\nMerced, CA 95343 - United States)","institution_ids":["https://openalex.org/I156087764"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100454075"],"corresponding_institution_ids":["https://openalex.org/I156087764"],"apc_list":null,"apc_paid":null,"fwci":0.9136,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.75611224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8242230415344238},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6853704452514648},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.547157347202301},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4957467019557953},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4475197196006775},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.444327175617218},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.4239652156829834},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.32822489738464355},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3260800838470459},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.27671515941619873},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.26214081048965454},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.2503940463066101},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1937248408794403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8242230415344238},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6853704452514648},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.547157347202301},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4957467019557953},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4475197196006775},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.444327175617218},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.4239652156829834},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.32822489738464355},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3260800838470459},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.27671515941619873},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.26214081048965454},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2503940463066101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1937248408794403}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3545008.3545090","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545090","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545090","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03718681v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03718681","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICPP '22: The 51st International Conference on Parallel Processing, Aug 2022, Bordeaux, France. &#x27E8;10.1145/3545008.3545090&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.1145/3545008.3545090","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545090","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545090","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.49000000953674316}],"awards":[{"id":"https://openalex.org/G1751644051","display_name":null,"funder_award_id":"DE-AC02-06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G2777053550","display_name":null,"funder_award_id":"AC02-06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6848031779","display_name":null,"funder_award_id":"06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6918803902","display_name":null,"funder_award_id":"06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G8143874970","display_name":null,"funder_award_id":"AC02-06CH11357","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4310385035.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W2004127822","https://openalex.org/W2032136713","https://openalex.org/W2117539524","https://openalex.org/W2125904114","https://openalex.org/W2155893237","https://openalex.org/W2194775991","https://openalex.org/W2341569833","https://openalex.org/W2900042580","https://openalex.org/W2906007643","https://openalex.org/W2952046647","https://openalex.org/W2963125010","https://openalex.org/W2969388332","https://openalex.org/W2970971581","https://openalex.org/W2987081155","https://openalex.org/W3012479151","https://openalex.org/W3039165326","https://openalex.org/W3048346032","https://openalex.org/W3081844508","https://openalex.org/W3100446627","https://openalex.org/W3104414677","https://openalex.org/W3130134189","https://openalex.org/W3169244019","https://openalex.org/W3200211247","https://openalex.org/W3205803342","https://openalex.org/W3206418153","https://openalex.org/W4230874317","https://openalex.org/W4283070722","https://openalex.org/W4295312788"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W1987128138","https://openalex.org/W2748922771"],"abstract_inverted_index":{"The":[0],"resource-hungry":[1],"and":[2,27,65,79,119,128,130,171,186,206,215,226],"time-consuming":[3],"process":[4,42],"of":[5,29,52,60,133,142,150,159,213],"training":[6,30,53,143,164,228],"Deep":[7],"Neural":[8],"Networks":[9],"(DNNs)":[10],"can":[11],"be":[12],"accelerated":[13],"by":[14,101,230],"optimizing":[15],"and/or":[16],"scaling":[17],"computations":[18],"on":[19,55,110,137],"accelerators":[20],"such":[21],"as":[22,35,163],"GPUs.":[23],"However,":[24],"the":[25,50,108,148,168,219],"loading":[26,41,93,118,170,180],"pre-processing":[28],"samples":[31,144,165],"then":[32],"often":[33],"emerges":[34],"a":[36,44,87,111,157,178,211],"new":[37,88],"bottleneck.":[38],"This":[39],"data":[40,54,62,92,117,120,169,179],"engages":[43],"complex":[45],"pipeline":[46],"that":[47,66,94,182,218],"extends":[48],"from":[49],"sampling":[51],"external":[56],"storage":[57],"to":[58,63,91,116,125,140,189,202,232],"delivery":[59],"those":[61,151],"GPUs,":[64],"comprises":[67],"not":[68,98],"only":[69],"expensive":[70],"I/O":[71,104,204,224],"operations":[72],"but":[73],"also":[74],"decoding,":[75],"shuffling,":[76],"batching,":[77],"augmentation,":[78],"other":[80,102],"operations.":[81],"We":[82,154],"propose":[83],"in":[84,200],"this":[85],"paper":[86],"holistic":[89],"approach":[90,221],"addresses":[95],"three":[96],"challenges":[97],"sufficiently":[99],"addressed":[100],"methods:":[103],"load":[105,207],"imbalances":[106],"among":[107],"GPUs":[109],"node;":[112],"rigid":[113],"resource":[114],"allocations":[115],"preprocessing":[121,172],"steps,":[122],"which":[123],"lead":[124],"idle":[126],"resources":[127],"bottlenecks;":[129],"limited":[131],"efficiency":[132],"caching":[134,199],"strategies":[135],"based":[136],"pre-fetching":[138],"due":[139],"eviction":[141,196],"needed":[145,152],"soon":[146],"at":[147],"expense":[149],"later.":[153],"first":[155],"present":[156],"study":[158],"key":[160],"bottlenecks":[161],"observed":[162],"flow":[166],"through":[167],"pipeline.":[173],"Then,":[174],"we":[175],"describe":[176],"Lobster,":[177],"runtime":[181],"uses":[183],"performance":[184],"modeling":[185],"advanced":[187],"heuristics":[188],"combine":[190],"flexible":[191],"thread":[192],"management":[193],"with":[194,210,236],"optimized":[195],"for":[197],"distributed":[198],"order":[201],"mitigate":[203],"overheads":[205,225],"imbalances.":[208],"Experiments":[209],"range":[212],"models":[214],"datasets":[216],"show":[217],"Lobster":[220],"reduces":[222],"both":[223],"end-to-end":[227],"times":[229],"up":[231],"1.5":[233],"\u00d7":[234],"compared":[235],"state-of-the-art":[237],"approaches.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
