{"id":"https://openalex.org/W4403182242","doi":"https://doi.org/10.1109/tpds.2024.3476390","title":"A Survey on Performance Modeling and Prediction for Distributed DNN Training","display_name":"A Survey on Performance Modeling and Prediction for Distributed DNN Training","publication_year":2024,"publication_date":"2024-10-07","ids":{"openalex":"https://openalex.org/W4403182242","doi":"https://doi.org/10.1109/tpds.2024.3476390"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2024.3476390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3476390","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055656398","display_name":"Zhenhua Guo","orcid":"https://orcid.org/0000-0002-1303-6681"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenhua Guo","raw_affiliation_strings":["IEIT SYSTEMS Company, Ltd., Jinan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Company, Ltd., Jinan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033649899","display_name":"Yinan Tang","orcid":"https://orcid.org/0000-0002-6029-3744"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yinan Tang","raw_affiliation_strings":["IEIT SYSTEMS Company, Ltd., Jinan, China"],"raw_orcid":"https://orcid.org/0000-0002-6029-3744","affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Company, Ltd., Jinan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071200777","display_name":"Jidong Zhai","orcid":"https://orcid.org/0000-0002-7656-6428"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jidong Zhai","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7656-6428","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042100540","display_name":"Tongtong Yuan","orcid":"https://orcid.org/0000-0002-8224-9891"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tongtong Yuan","raw_affiliation_strings":["Beijing University of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8224-9891","affiliations":[{"raw_affiliation_string":"Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100745788","display_name":"Jian Jin","orcid":"https://orcid.org/0000-0002-4128-966X"},"institutions":[{"id":"https://openalex.org/I4210130112","display_name":"China Academy of Information and Communications Technology","ror":"https://ror.org/038dte259","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210130112","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Jin","raw_affiliation_strings":["CAICT, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4128-966X","affiliations":[{"raw_affiliation_string":"CAICT, Beijing, China","institution_ids":["https://openalex.org/I4210130112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382429","display_name":"Wang Li","orcid":"https://orcid.org/0000-0003-1286-4407"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li Wang","raw_affiliation_strings":["IEIT SYSTEMS Company, Ltd., Jinan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Company, Ltd., Jinan, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101017833","display_name":"Yaqian Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaqian Zhao","raw_affiliation_strings":["IEIT SYSTEMS Company, Ltd., Jinan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Company, Ltd., Jinan, China","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Rengang Li","orcid":"https://orcid.org/0009-0008-4460-9869"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rengang Li","raw_affiliation_strings":["IEIT SYSTEMS Company, Ltd., Jinan, China"],"raw_orcid":"https://orcid.org/0009-0008-4460-9869","affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Company, Ltd., Jinan, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0936,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79060703,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"35","issue":"12","first_page":"2463","last_page":"2478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8910999894142151,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8910999894142151,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11932","display_name":"Wireless Body Area Networks","score":0.7656999826431274,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.7523000240325928,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8254954814910889},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5762499570846558},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4523027539253235},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41799330711364746},{"id":"https://openalex.org/keywords/performance-prediction","display_name":"Performance prediction","score":0.4133853316307068},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3801257312297821},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3713541626930237},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3402734398841858},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.24005895853042603},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.13299235701560974}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8254954814910889},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5762499570846558},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4523027539253235},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41799330711364746},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.4133853316307068},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3801257312297821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3713541626930237},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3402734398841858},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.24005895853042603},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.13299235701560974},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2024.3476390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3476390","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":156,"referenced_works":["https://openalex.org/W1625390266","https://openalex.org/W1686810756","https://openalex.org/W1902237438","https://openalex.org/W1975934607","https://openalex.org/W1979527452","https://openalex.org/W2018162709","https://openalex.org/W2057332538","https://openalex.org/W2059300917","https://openalex.org/W2063123613","https://openalex.org/W2104166130","https://openalex.org/W2108207895","https://openalex.org/W2112684994","https://openalex.org/W2119541875","https://openalex.org/W2119738171","https://openalex.org/W2139634083","https://openalex.org/W2141024963","https://openalex.org/W2141561793","https://openalex.org/W2157225945","https://openalex.org/W2186615578","https://openalex.org/W2194775991","https://openalex.org/W2271840356","https://openalex.org/W2338908902","https://openalex.org/W2525778437","https://openalex.org/W2585029434","https://openalex.org/W2606722458","https://openalex.org/W2763421725","https://openalex.org/W2798515322","https://openalex.org/W2800652671","https://openalex.org/W2883283076","https://openalex.org/W2883830791","https://openalex.org/W2896457183","https://openalex.org/W2899713699","https://openalex.org/W2903557836","https://openalex.org/W2911491685","https://openalex.org/W2946408574","https://openalex.org/W2947737663","https://openalex.org/W2955454939","https://openalex.org/W2963149401","https://openalex.org/W2964324519","https://openalex.org/W2973727699","https://openalex.org/W2975712713","https://openalex.org/W2981758446","https://openalex.org/W2987203808","https://openalex.org/W2991040477","https://openalex.org/W3000161077","https://openalex.org/W3004495293","https://openalex.org/W3006586535","https://openalex.org/W3007772124","https://openalex.org/W3008591352","https://openalex.org/W3012125688","https://openalex.org/W3012220622","https://openalex.org/W3019389960","https://openalex.org/W3058843122","https://openalex.org/W3081168214","https://openalex.org/W3096425133","https://openalex.org/W3101104221","https://openalex.org/W3105739685","https://openalex.org/W3121689374","https://openalex.org/W3129831491","https://openalex.org/W3132107458","https://openalex.org/W3132858873","https://openalex.org/W3134069806","https://openalex.org/W3135013702","https://openalex.org/W3138303811","https://openalex.org/W3152893301","https://openalex.org/W3157919170","https://openalex.org/W3174661481","https://openalex.org/W3181228931","https://openalex.org/W3187367917","https://openalex.org/W3188065709","https://openalex.org/W3196884386","https://openalex.org/W3197525459","https://openalex.org/W3205803342","https://openalex.org/W3206832494","https://openalex.org/W4213132668","https://openalex.org/W4214866087","https://openalex.org/W4220967350","https://openalex.org/W4229005866","https://openalex.org/W4235645210","https://openalex.org/W4288089799","https://openalex.org/W4290055971","https://openalex.org/W4290991029","https://openalex.org/W4297097368","https://openalex.org/W4298168912","https://openalex.org/W4301239768","https://openalex.org/W4303443398","https://openalex.org/W4310385035","https://openalex.org/W4310557597","https://openalex.org/W4312131336","https://openalex.org/W4312411608","https://openalex.org/W4319781766","https://openalex.org/W4321434591","https://openalex.org/W4322718191","https://openalex.org/W4323060992","https://openalex.org/W4362515116","https://openalex.org/W4365211601","https://openalex.org/W4381886086","https://openalex.org/W4381894573","https://openalex.org/W4382119135","https://openalex.org/W4385245566","https://openalex.org/W4385585365","https://openalex.org/W4386709668","https://openalex.org/W4387914704","https://openalex.org/W4388031315","https://openalex.org/W4390874575","https://openalex.org/W4401567198","https://openalex.org/W6637373629","https://openalex.org/W6638444622","https://openalex.org/W6679154944","https://openalex.org/W6679393576","https://openalex.org/W6680402377","https://openalex.org/W6680804895","https://openalex.org/W6686509673","https://openalex.org/W6694517276","https://openalex.org/W6703652217","https://openalex.org/W6727690538","https://openalex.org/W6744307745","https://openalex.org/W6745245109","https://openalex.org/W6745609711","https://openalex.org/W6748645090","https://openalex.org/W6749290080","https://openalex.org/W6751349269","https://openalex.org/W6751627690","https://openalex.org/W6753584990","https://openalex.org/W6755207826","https://openalex.org/W6756379755","https://openalex.org/W6758823024","https://openalex.org/W6762211661","https://openalex.org/W6763737044","https://openalex.org/W6766978945","https://openalex.org/W6767997687","https://openalex.org/W6768723914","https://openalex.org/W6769627184","https://openalex.org/W6774508117","https://openalex.org/W6774806506","https://openalex.org/W6775649616","https://openalex.org/W6778883912","https://openalex.org/W6779076691","https://openalex.org/W6781915296","https://openalex.org/W6784425352","https://openalex.org/W6787953186","https://openalex.org/W6791545107","https://openalex.org/W6799023841","https://openalex.org/W6803835703","https://openalex.org/W6810097121","https://openalex.org/W6811340617","https://openalex.org/W6811726652","https://openalex.org/W6811928498","https://openalex.org/W6838632916","https://openalex.org/W6840716182","https://openalex.org/W6846002521","https://openalex.org/W6847106655","https://openalex.org/W6849539251","https://openalex.org/W6850625674","https://openalex.org/W6851022115","https://openalex.org/W6851775633"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4366179611","https://openalex.org/W2162051485"],"abstract_inverted_index":{"The":[0,96],"recent":[1],"breakthroughs":[2],"in":[3,49,107,140],"large-scale":[4,28,117],"DNN":[5,16,30,43,57,77,119,138],"attract":[6],"significant":[7],"attention":[8],"from":[9],"both":[10],"academia":[11],"and":[12,23,37,65,71,80,86,90,109,113,121,134],"industry":[13],"toward":[14],"distributed":[15,29,42,56,76,118,137],"training":[17,44,58,139],"techniques.":[18],"Due":[19],"to":[20,35,51,104,123,126],"the":[21,39,53,67,73,82,141],"time-consuming":[22],"expensive":[24],"execution":[25],"process":[26],"of":[27,41,55,69,75,98],"training,":[31,78,120],"it":[32],"is":[33],"crucial":[34],"model":[36],"predict":[38],"performance":[40,74,132],"before":[45],"its":[46],"actual":[47],"deployment,":[48],"order":[50],"optimize":[52],"design":[54],"at":[59],"low":[60],"cost.":[61],"This":[62],"paper":[63,100],"analyzes":[64],"emphasizes":[66],"importance":[68],"modeling":[70,112,133],"predicting":[72],"categorizes":[79],"analyses":[81],"related":[83],"state-of-the-art":[84],"works,":[85],"discusses":[87],"future":[88],"challenges":[89],"opportunities":[91],"for":[92,116,136],"this":[93,99],"research":[94,130],"field.":[95],"objectives":[97],"are":[101],"twofold:":[102],"first,":[103],"assist":[105],"researchers":[106,125],"understanding":[108],"choosing":[110],"suitable":[111],"prediction":[114,135],"tools":[115],"second,":[122],"encourage":[124],"propose":[127],"more":[128],"valuable":[129],"about":[131],"future.":[142]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
