{"id":"https://openalex.org/W1992135271","doi":"https://doi.org/10.1145/2503210.2503237","title":"Distributed wait state tracking for runtime MPI deadlock detection","display_name":"Distributed wait state tracking for runtime MPI deadlock detection","publication_year":2013,"publication_date":"2013-10-30","ids":{"openalex":"https://openalex.org/W1992135271","doi":"https://doi.org/10.1145/2503210.2503237","mag":"1992135271"},"language":"en","primary_location":{"id":"doi:10.1145/2503210.2503237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2503210.2503237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034838586","display_name":"Tobias Hilbrich","orcid":null},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Tobias Hilbrich","raw_affiliation_strings":["Technische Universit\u00e4t, Dresden, Germany","Technische Universitat Dresden, Germany#TAB#"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Technische Universitat Dresden, Germany#TAB#","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058719424","display_name":"Bronis R. de Supinski","orcid":"https://orcid.org/0000-0002-0339-1006"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bronis R. de Supinski","raw_affiliation_strings":["Lawrence Livermore National Laboratory, Livermore, CA"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, Livermore, CA","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040346332","display_name":"Wolfgang E. Nagel","orcid":null},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wolfgang E. Nagel","raw_affiliation_strings":["Technische Universit\u00e4t, Dresden, Germany","Technische Universitat Dresden, Germany#TAB#"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Technische Universitat Dresden, Germany#TAB#","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109841789","display_name":"Joachim Protze","orcid":null},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Joachim Protze","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany","RWTH-Aachen University, Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"RWTH-Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091344667","display_name":"Christel Baier","orcid":"https://orcid.org/0000-0002-5321-9343"},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christel Baier","raw_affiliation_strings":["Technische Universit\u00e4t, Dresden, Germany","Technische Universitat Dresden, Germany#TAB#"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Technische Universitat Dresden, Germany#TAB#","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108339675","display_name":"Matthias M\u00fcller","orcid":"https://orcid.org/0009-0005-8772-7775"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Matthias S. M\u00fcller","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany","RWTH-Aachen University, Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"RWTH-Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5034838586"],"corresponding_institution_ids":["https://openalex.org/I78650965"],"apc_list":null,"apc_paid":null,"fwci":2.6446,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.90370426,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"33","issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10933","display_name":"Real-Time Systems Scheduling","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8897788524627686},{"id":"https://openalex.org/keywords/deadlock-prevention-algorithms","display_name":"Deadlock prevention algorithms","score":0.8757338523864746},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7525968551635742},{"id":"https://openalex.org/keywords/deadlock","display_name":"Deadlock","score":0.7266641855239868},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6489412188529968},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.629197359085083},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5916308164596558},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.5119389295578003},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5054640769958496},{"id":"https://openalex.org/keywords/timeout","display_name":"Timeout","score":0.5047374963760376},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.4124566614627838},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40388575196266174},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16171231865882874},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13446450233459473},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10605663061141968},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09947994351387024}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8897788524627686},{"id":"https://openalex.org/C113429609","wikidata":"https://www.wikidata.org/wiki/Q4060699","display_name":"Deadlock prevention algorithms","level":3,"score":0.8757338523864746},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7525968551635742},{"id":"https://openalex.org/C159023740","wikidata":"https://www.wikidata.org/wiki/Q623276","display_name":"Deadlock","level":2,"score":0.7266641855239868},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6489412188529968},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.629197359085083},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5916308164596558},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.5119389295578003},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5054640769958496},{"id":"https://openalex.org/C31691690","wikidata":"https://www.wikidata.org/wiki/Q1753979","display_name":"Timeout","level":2,"score":0.5047374963760376},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.4124566614627838},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40388575196266174},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16171231865882874},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13446450233459473},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10605663061141968},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09947994351387024},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2503210.2503237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2503210.2503237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:publications.rwth-aachen.de:227787","is_oa":false,"landing_page_url":"http://publications.rwth-aachen.de/record/227787","pdf_url":null,"source":{"id":"https://openalex.org/S4306401033","display_name":"RWTH Publications (RWTH Aachen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887968799","host_organization_name":"RWTH Aachen University","host_organization_lineage":["https://openalex.org/I887968799"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"SC '13 Proceedings of SC13 : International Conference for High Performance Computing, Networking, Storage and Analysis ; Denver, Colo., 17-22 November 2013<br/>SC13 : International Conference for High Performance Computing, Networking, Storage and Analysis, Denver, CO, USA, 2013-11-17 - 2013-11-22","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.550000011920929}],"awards":[{"id":"https://openalex.org/G1784276297","display_name":null,"funder_award_id":"287703","funder_id":"https://openalex.org/F4320334960","funder_display_name":"Seventh Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320334960","display_name":"Seventh Framework Programme","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"},{"id":"https://openalex.org/F4320338304","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W46645073","https://openalex.org/W77419049","https://openalex.org/W1523442991","https://openalex.org/W1568729458","https://openalex.org/W1588846961","https://openalex.org/W1652742168","https://openalex.org/W1948591071","https://openalex.org/W1978562726","https://openalex.org/W1990128020","https://openalex.org/W2023728230","https://openalex.org/W2046183160","https://openalex.org/W2056400780","https://openalex.org/W2069501221","https://openalex.org/W2093709900","https://openalex.org/W2106913893","https://openalex.org/W2116746874","https://openalex.org/W2131053137","https://openalex.org/W2139540534","https://openalex.org/W2149871224","https://openalex.org/W2152419477","https://openalex.org/W2160097679","https://openalex.org/W2160611910","https://openalex.org/W2162543097","https://openalex.org/W2162989857","https://openalex.org/W2166656891","https://openalex.org/W2168225224","https://openalex.org/W2171371822","https://openalex.org/W2561675875","https://openalex.org/W4285719527","https://openalex.org/W4386616824","https://openalex.org/W6601880353","https://openalex.org/W6675805694"],"related_works":["https://openalex.org/W2050076411","https://openalex.org/W2001478969","https://openalex.org/W1542183432","https://openalex.org/W2360686363","https://openalex.org/W1900787600","https://openalex.org/W2136552483","https://openalex.org/W2166954426","https://openalex.org/W1966106371","https://openalex.org/W4233563968","https://openalex.org/W4232938291"],"abstract_inverted_index":{"The":[0],"widely":[1],"used":[2],"Message":[3],"Passing":[4],"Interface":[5],"(MPI)":[6],"with":[7,88],"its":[8,57],"multitude":[9],"of":[10,26,41,67,129,145],"communication":[11],"functions":[12],"is":[13],"prone":[14],"to":[15,63,81,105,123],"usage":[16],"errors.":[17,28],"Runtime":[18],"error":[19],"detection":[20,73,87,116],"tools":[21,68],"aid":[22],"in":[23,51,117],"the":[24,127],"removal":[25],"these":[27],"We":[29],"develop":[30],"MUST":[31],"as":[32],"one":[33],"such":[34],"tool":[35],"that":[36,69,102],"provides":[37],"a":[38,52,64,76,96,107,136],"wide":[39,65],"variety":[40],"automatic":[42],"correctness":[43,46],"checks.":[44],"Its":[45],"checks":[47],"can":[48],"be":[49],"run":[50],"distributed":[53,85,108],"mode,":[54],"except":[55],"for":[56,98,135],"deadlock":[58,86,92,115],"detection.":[59],"This":[60,110],"limitation":[61],"applies":[62],"range":[66],"either":[70],"use":[71,104],"centralized":[72],"algorithms":[74],"or":[75],"timeout":[77],"approach.":[78,131],"In":[79],"order":[80],"provide":[82],"scalable":[83,113],"and":[84],"detailed":[89],"insight":[90],"into":[91],"situations,":[93],"we":[94,103],"propose":[95],"model":[97],"MPI":[99,114],"blocking":[100],"conditions":[101],"formulate":[106],"algorithm.":[109],"algorithm":[111],"implements":[112],"MUST.":[118],"Stress":[119],"tests":[120],"at":[121,147],"up":[122],"4,096":[124],"processes":[125],"demonstrate":[126,140],"scalability":[128],"our":[130],"Finally,":[132],"overhead":[133],"results":[134],"complex":[137],"benchmark":[138],"suite":[139],"an":[141],"average":[142],"runtime":[143],"increase":[144],"34%":[146],"2,048":[148],"processes.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
