{"id":"https://openalex.org/W4403721832","doi":"https://doi.org/10.1109/tpds.2024.3485789","title":"Leveraging Graph Analysis to Pinpoint Root Causes of Scalability Issues for Parallel Applications","display_name":"Leveraging Graph Analysis to Pinpoint Root Causes of Scalability Issues for Parallel Applications","publication_year":2024,"publication_date":"2024-10-24","ids":{"openalex":"https://openalex.org/W4403721832","doi":"https://doi.org/10.1109/tpds.2024.3485789"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2024.3485789","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3485789","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063581276","display_name":"Yuyang Jin","orcid":"https://orcid.org/0000-0003-2358-3395"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuyang Jin","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100735476","display_name":"Haojie Wang","orcid":"https://orcid.org/0000-0003-4605-148X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haojie Wang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011572497","display_name":"Xiongchao Tang","orcid":"https://orcid.org/0000-0002-1692-3964"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongchao Tang","raw_affiliation_strings":["Qingcheng.AI Company, Beijing, China","Qingcheng.AI company, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Qingcheng.AI Company, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"Qingcheng.AI company, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055656398","display_name":"Zhenhua Guo","orcid":"https://orcid.org/0000-0002-1303-6681"},"institutions":[{"id":"https://openalex.org/I4210144143","display_name":"Inspur (China)","ror":"https://ror.org/0474p4r72","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210144143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Guo","raw_affiliation_strings":["State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Company Ltd., Jinan, China","State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Co., Ltd, Jinan, Shandong, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Company Ltd., Jinan, China","institution_ids":["https://openalex.org/I4210144143"]},{"raw_affiliation_string":"State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Co., Ltd, Jinan, Shandong, China","institution_ids":["https://openalex.org/I4210144143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101568187","display_name":"Yaqian Zhao","orcid":"https://orcid.org/0000-0002-9170-0090"},"institutions":[{"id":"https://openalex.org/I4210144143","display_name":"Inspur (China)","ror":"https://ror.org/0474p4r72","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210144143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaqian Zhao","raw_affiliation_strings":["State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Company Ltd., Jinan, China","State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Co., Ltd, Jinan, Shandong, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Company Ltd., Jinan, China","institution_ids":["https://openalex.org/I4210144143"]},{"raw_affiliation_string":"State Key Laboratory of High-End and Storage Technology, Inspur Electronic Information Industry Co., Ltd, Jinan, Shandong, China","institution_ids":["https://openalex.org/I4210144143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026990786","display_name":"Torsten Hoefler","orcid":"https://orcid.org/0000-0002-1333-9797"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Torsten Hoefler","raw_affiliation_strings":["Department of Computer Science, Scalable Parallel Computing Laboratory (SPCL), ETH Zurich, Zurich, Switzerland","Scalable Parallel Computing Laboratory (SPCL), the Department of Computer Science, ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Scalable Parallel Computing Laboratory (SPCL), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"Scalable Parallel Computing Laboratory (SPCL), the Department of Computer Science, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100735051","display_name":"Tao Liu","orcid":"https://orcid.org/0000-0002-9653-4108"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Liu","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences), Jinan, Shandong, China","institution_ids":["https://openalex.org/I4210142748","https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100331713","display_name":"Xu Liu","orcid":"https://orcid.org/0000-0002-8984-5485"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xu Liu","raw_affiliation_strings":["Computer Science Department, North Carolina State University, Raleigh, NC, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, North Carolina State University, Raleigh, NC, USA","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071200777","display_name":"Jidong Zhai","orcid":"https://orcid.org/0000-0002-7656-6428"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jidong Zhai","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5063581276"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.245,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54059894,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"2","first_page":"308","last_page":"325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8454610109329224},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6908634901046753},{"id":"https://openalex.org/keywords/root-cause-analysis","display_name":"Root cause analysis","score":0.5904005765914917},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5586808919906616},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4707285463809967},{"id":"https://openalex.org/keywords/root","display_name":"Root (linguistics)","score":0.4432033598423004},{"id":"https://openalex.org/keywords/call-graph","display_name":"Call graph","score":0.41526955366134644},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.36922845244407654},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.35825878381729126},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13323062658309937},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.09113892912864685}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8454610109329224},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6908634901046753},{"id":"https://openalex.org/C130963320","wikidata":"https://www.wikidata.org/wiki/Q1401207","display_name":"Root cause analysis","level":2,"score":0.5904005765914917},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5586808919906616},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4707285463809967},{"id":"https://openalex.org/C171078966","wikidata":"https://www.wikidata.org/wiki/Q111029","display_name":"Root (linguistics)","level":2,"score":0.4432033598423004},{"id":"https://openalex.org/C102379954","wikidata":"https://www.wikidata.org/wiki/Q2589940","display_name":"Call graph","level":2,"score":0.41526955366134644},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.36922845244407654},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35825878381729126},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13323062658309937},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.09113892912864685},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2024.3485789","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2024.3485789","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4593719135","display_name":null,"funder_award_id":"62302251","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5298069657","display_name":null,"funder_award_id":"BX20230193","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G5733278378","display_name":null,"funder_award_id":"U23B2027","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W153244122","https://openalex.org/W960901134","https://openalex.org/W1133087070","https://openalex.org/W1545880915","https://openalex.org/W1559603603","https://openalex.org/W1575775940","https://openalex.org/W1582213161","https://openalex.org/W1595501290","https://openalex.org/W1823178428","https://openalex.org/W1964800194","https://openalex.org/W2009927602","https://openalex.org/W2022421114","https://openalex.org/W2024135699","https://openalex.org/W2024166759","https://openalex.org/W2030553163","https://openalex.org/W2063364927","https://openalex.org/W2064452120","https://openalex.org/W2067746574","https://openalex.org/W2074172828","https://openalex.org/W2101778912","https://openalex.org/W2106913893","https://openalex.org/W2108177987","https://openalex.org/W2109074901","https://openalex.org/W2109098807","https://openalex.org/W2109774496","https://openalex.org/W2114834656","https://openalex.org/W2136434791","https://openalex.org/W2136895653","https://openalex.org/W2144038733","https://openalex.org/W2153773948","https://openalex.org/W2160112364","https://openalex.org/W2161565966","https://openalex.org/W2169631286","https://openalex.org/W2169880319","https://openalex.org/W2293735149","https://openalex.org/W2519457682","https://openalex.org/W2780969726","https://openalex.org/W2783698613","https://openalex.org/W2987414305","https://openalex.org/W3022379043","https://openalex.org/W3034583809","https://openalex.org/W3186473950","https://openalex.org/W3201073812","https://openalex.org/W4205091457","https://openalex.org/W4220788255","https://openalex.org/W4232757129","https://openalex.org/W4245845567","https://openalex.org/W4246569695","https://openalex.org/W4250558257","https://openalex.org/W4252521241","https://openalex.org/W4396814957","https://openalex.org/W6741126538","https://openalex.org/W6753570625"],"related_works":["https://openalex.org/W2030594396","https://openalex.org/W2535098331","https://openalex.org/W3015720271","https://openalex.org/W4255366506","https://openalex.org/W2202104725","https://openalex.org/W2056250485","https://openalex.org/W4280640835","https://openalex.org/W4290642490","https://openalex.org/W4230900947","https://openalex.org/W2111856191"],"abstract_inverted_index":{"It":[0],"is":[1,35,117],"challenging":[2],"to":[3,7,67,83,91,106,135,160,187,202],"scale":[4],"parallel":[5,99,114],"applications":[6,157],"modern":[8],"supercomputers":[9],"because":[10],"of":[11,71,98,141,150,163,176,183],"load":[12],"imbalance,":[13],"resource":[14],"contention,":[15],"and":[16,21,73,95,101,122,148,165],"communications":[17],"between":[18],"processes.":[19,189],"Profiling":[20,34],"tracing":[22,74],"are":[23],"two":[24],"main":[25],"performance":[26,108,115,204],"analysis":[27,65,90],"approaches":[28,105],"for":[29,41,185],"detecting":[30],"these":[31,55],"scalability":[32],"bottlenecks.":[33],"low-cost":[36],"but":[37,49],"lacks":[38],"detailed":[39],"dependence":[40,97],"identifying":[42],"root":[43,139,174,193],"causes.":[44],"Tracing":[45],"records":[46],"plentiful":[47],"information":[48],"incurs":[50],"significant":[51],"overheads.":[52],"To":[53],"address":[54],"issues,":[56],"we":[57,129],"present":[58],"<sc":[59,85,151],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[60,86,152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">ScalAna</small>,":[61],"which":[62],"employs":[63],"static":[64,89,121],"techniques":[66],"combine":[68],"the":[69,138,146,173,192],"benefits":[70],"profiling":[72,104],"-":[75],"it":[76,199],"enables":[77],"tracing's":[78],"analyzability":[79],"with":[80,119,158,179],"overhead":[81,182],"similar":[82],"profiling.":[84],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">ScalAna</small>":[87,153],"uses":[88],"capture":[92],"program":[93],"structures":[94],"data":[96,109],"applications,":[100],"leverages":[102],"lightweight":[103],"record":[107],"during":[110],"runtime.":[111],"Then":[112],"a":[113,131],"graph":[116],"generated":[118],"both":[120],"dynamic":[123],"data.":[124],"Based":[125],"on":[126],"this":[127],"graph,":[128],"design":[130],"backtracking":[132],"detection":[133],"approach":[134,169],"automatically":[136],"pinpoint":[137,172],"causes":[140,175,194],"scaling":[142,177],"issues.":[143],"We":[144],"evaluate":[145],"efficacy":[147],"efficiency":[149],"using":[154],"several":[155],"real":[156],"up":[159,186,201],"704K":[161],"lines":[162],"code":[164],"demonstrate":[166],"that":[167],"our":[168,197],"can":[170],"effectively":[171],"loss":[178],"an":[180],"average":[181],"5.65%":[184],"16,384":[188],"By":[190],"fixing":[191],"detected":[195],"by":[196],"tool,":[198],"achieves":[200],"33.01%":[203],"improvement.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
