{"id":"https://openalex.org/W2767472765","doi":"https://doi.org/10.1145/3126908.3126938","title":"Parastack","display_name":"Parastack","publication_year":2017,"publication_date":"2017-11-08","ids":{"openalex":"https://openalex.org/W2767472765","doi":"https://doi.org/10.1145/3126908.3126938","mag":"2767472765"},"language":"en","primary_location":{"id":"doi:10.1145/3126908.3126938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126908.3126938","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100402160","display_name":"Hongbo Li","orcid":"https://orcid.org/0000-0001-8173-234X"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongbo Li","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061737717","display_name":"Zizhong Chen","orcid":"https://orcid.org/0000-0003-2578-4940"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zizhong Chen","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100699251","display_name":"Rajiv Gupta","orcid":"https://orcid.org/0000-0002-9348-3974"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajiv Gupta","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100402160"],"corresponding_institution_ids":["https://openalex.org/I2803209242"],"apc_list":null,"apc_paid":null,"fwci":0.6759,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.70091678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timeout","display_name":"Timeout","score":0.9828503131866455},{"id":"https://openalex.org/keywords/hang","display_name":"Hang","score":0.8706204891204834},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8359115123748779},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7912298440933228},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6913948059082031},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6378269195556641},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.496390163898468},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.47877633571624756},{"id":"https://openalex.org/keywords/alarm","display_name":"ALARM","score":0.43113577365875244},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41958802938461304},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4052715301513672},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3311127722263336},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.25319212675094604},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1756369173526764},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07296991348266602}],"concepts":[{"id":"https://openalex.org/C31691690","wikidata":"https://www.wikidata.org/wiki/Q1753979","display_name":"Timeout","level":2,"score":0.9828503131866455},{"id":"https://openalex.org/C2781323245","wikidata":"https://www.wikidata.org/wiki/Q1363761","display_name":"Hang","level":2,"score":0.8706204891204834},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8359115123748779},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7912298440933228},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6913948059082031},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6378269195556641},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.496390163898468},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.47877633571624756},{"id":"https://openalex.org/C2779119184","wikidata":"https://www.wikidata.org/wiki/Q294350","display_name":"ALARM","level":2,"score":0.43113577365875244},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41958802938461304},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4052715301513672},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3311127722263336},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25319212675094604},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1756369173526764},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07296991348266602},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3126908.3126938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126908.3126938","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5213981802","display_name":null,"funder_award_id":"CCF-1524852, CCF-1318103, OAC-1305624, CCF-1513201","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W307918303","https://openalex.org/W1252105715","https://openalex.org/W1964800194","https://openalex.org/W1976859632","https://openalex.org/W1978562726","https://openalex.org/W1986905947","https://openalex.org/W1992135271","https://openalex.org/W1994941228","https://openalex.org/W2000882268","https://openalex.org/W2009927602","https://openalex.org/W2013948980","https://openalex.org/W2024166759","https://openalex.org/W2056400780","https://openalex.org/W2061156357","https://openalex.org/W2068975988","https://openalex.org/W2073949144","https://openalex.org/W2097004344","https://openalex.org/W2106913893","https://openalex.org/W2110695317","https://openalex.org/W2115408363","https://openalex.org/W2122295525","https://openalex.org/W2123728588","https://openalex.org/W2132148774","https://openalex.org/W2143303199","https://openalex.org/W2149897682","https://openalex.org/W2157593310","https://openalex.org/W2160097679","https://openalex.org/W2161957670","https://openalex.org/W2162989857","https://openalex.org/W2303756261","https://openalex.org/W4244169215","https://openalex.org/W6698255013"],"related_works":["https://openalex.org/W2001619020","https://openalex.org/W2318163330","https://openalex.org/W4362636126","https://openalex.org/W2117749464","https://openalex.org/W2113451084","https://openalex.org/W2094374755","https://openalex.org/W2008788431","https://openalex.org/W2045868190","https://openalex.org/W1982218871","https://openalex.org/W2048785841"],"abstract_inverted_index":{"While":[0],"program":[1],"hangs":[2,68,181],"on":[3,160],"large":[4,39],"parallel":[5,151],"systems":[6],"can":[7],"be":[8],"detected":[9,93],"via":[10],"the":[11,20,24,52,84,105,108,114,118,129,147,167,220],"widely":[12],"used":[13],"timeout":[14,25,30,41,89],"mechanism,":[15],"it":[16,95],"is":[17,107,196],"difficult":[18],"for":[19,98,223],"users":[21,103],"to":[22,32,66,86,144],"set":[23],"-":[26],"too":[27,38],"small":[28],"a":[29,40,43,70,88,92,122,183],"leads":[31],"high":[33,74],"false":[34,194],"alarm":[35,195],"rates":[36],"and":[37,81,135,149,154,158,162,171,209],"wastes":[42],"vast":[44],"amount":[45],"of":[46,110,137,206,214],"valuable":[47],"computing":[48],"resources.":[49],"To":[50],"address":[51],"above":[53],"problems":[54],"with":[55,73,78,146,189],"hang":[56,106],"detection,":[57],"this":[58],"paper":[59],"presents":[60],"ParaStack,":[61],"an":[62,111],"extremely":[63],"lightweight":[64],"tool":[65,127],"detect":[67],"in":[69,113,182,198],"timely":[71,184],"manner":[72,185],"accuracy,":[75],"negligible":[76,187],"overhead":[77,188],"great":[79],"scalability,":[80],"without":[82],"requiring":[83],"user":[85],"select":[87],"value.":[90],"For":[91,121],"hang,":[94,125],"provides":[96],"direction":[97],"further":[99],"analysis":[100],"by":[101,132],"telling":[102],"whether":[104],"result":[109],"error":[112],"computation":[115],"phase":[116],"or":[117],"communication":[119],"phase.":[120],"computation-error":[123,224],"induced":[124,225],"our":[126],"pinpoints":[128],"faulty":[130,221],"process":[131,222],"excluding":[133],"hundreds":[134],"thousands":[136],"other":[138],"processes.":[139,216],"We":[140],"have":[141],"adapted":[142],"ParaStack":[143,179,217],"work":[145],"Torque":[148],"Slurm":[150],"batch":[152],"schedulers":[153],"validated":[155],"its":[156],"functionality":[157],"performance":[159],"Tianhe-2":[161],"Stampede":[163],"that":[164,178],"are":[165],"respectively":[166],"world's":[168],"current":[169],"2nd":[170],"12th":[172],"fastest":[173],"supercomputers.":[174],"Experimental":[175],"results":[176],"demonstrate":[177],"detects":[180],"at":[186,204,212],"over":[190],"99%":[191],"accuracy.":[192],"No":[193],"observed":[197],"correct":[199],"runs":[200],"taking":[201],"66":[202],"hours":[203,211],"scale":[205,213],"256":[207],"processes":[208],"39.7":[210],"1024":[215],"accurately":[218],"reports":[219],"hangs.":[226]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2017-11-17T00:00:00"}
