{"id":"https://openalex.org/W1995565928","doi":"https://doi.org/10.1145/1838574.1838583","title":"End-to-end framework for fault management for open source clusters","display_name":"End-to-end framework for fault management for open source clusters","publication_year":2010,"publication_date":"2010-08-02","ids":{"openalex":"https://openalex.org/W1995565928","doi":"https://doi.org/10.1145/1838574.1838583","mag":"1995565928"},"language":"en","primary_location":{"id":"doi:10.1145/1838574.1838583","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838574.1838583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 TeraGrid Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019468716","display_name":"John L. Hammond","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"John L. Hammond","raw_affiliation_strings":["ICES, University of Texas, Austin, Texas","ICES, University of Texas, Austin, Texas#TAB#"],"affiliations":[{"raw_affiliation_string":"ICES, University of Texas, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"ICES, University of Texas, Austin, Texas#TAB#","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088884655","display_name":"Tommy Minyard","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tommy Minyard","raw_affiliation_strings":["TACC, University of Texas, Austin, Texas","TACC, University of Texas, Austin, Texas#TAB#"],"affiliations":[{"raw_affiliation_string":"TACC, University of Texas, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"TACC, University of Texas, Austin, Texas#TAB#","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059972792","display_name":"J. C. Browne","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jim Browne","raw_affiliation_strings":["University of Texas, Austin, Texas"],"affiliations":[{"raw_affiliation_string":"University of Texas, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5019468716"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":1.7806,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.84893693,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7032486200332642},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6938004493713379},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.6495130062103271},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5335742831230164},{"id":"https://openalex.org/keywords/fault-management","display_name":"Fault management","score":0.5222228765487671},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5111809372901917},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4857361912727356},{"id":"https://openalex.org/keywords/open-source-software","display_name":"Open source software","score":0.4714280962944031},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.4368622899055481},{"id":"https://openalex.org/keywords/fault","display_name":"Fault (geology)","score":0.42208749055862427},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18436706066131592}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7032486200332642},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6938004493713379},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.6495130062103271},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5335742831230164},{"id":"https://openalex.org/C108074857","wikidata":"https://www.wikidata.org/wiki/Q3067360","display_name":"Fault management","level":3,"score":0.5222228765487671},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5111809372901917},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4857361912727356},{"id":"https://openalex.org/C2988343187","wikidata":"https://www.wikidata.org/wiki/Q1130645","display_name":"Open source software","level":3,"score":0.4714280962944031},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4368622899055481},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.42208749055862427},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18436706066131592},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1838574.1838583","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838574.1838583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 TeraGrid Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.44999998807907104}],"awards":[{"id":"https://openalex.org/G2691359754","display_name":null,"funder_award_id":"622780","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W130470821","https://openalex.org/W150560166","https://openalex.org/W306338726","https://openalex.org/W1486832445","https://openalex.org/W1507189627","https://openalex.org/W1539949561","https://openalex.org/W1589857376","https://openalex.org/W1955571710","https://openalex.org/W1992713698","https://openalex.org/W2031188261","https://openalex.org/W2032858539","https://openalex.org/W2048327004","https://openalex.org/W2053691501","https://openalex.org/W2094924503","https://openalex.org/W2098695822","https://openalex.org/W2107263349","https://openalex.org/W2111065486","https://openalex.org/W2136159049","https://openalex.org/W2138916466","https://openalex.org/W2143220335","https://openalex.org/W2145864256","https://openalex.org/W2158907675","https://openalex.org/W2611503582","https://openalex.org/W6635048552","https://openalex.org/W6674809358","https://openalex.org/W7006227833"],"related_works":["https://openalex.org/W4376877853","https://openalex.org/W1493891899","https://openalex.org/W3005935371","https://openalex.org/W4250928611","https://openalex.org/W166480398","https://openalex.org/W1612808768","https://openalex.org/W167327709","https://openalex.org/W1977393088","https://openalex.org/W4387839566","https://openalex.org/W4210922265"],"abstract_inverted_index":{"The":[0,82],"scale":[1],"and":[2,7,22,41,51,98,101,134],"complexity":[3],"of":[4,20,84,105,127],"both":[5],"hardware":[6],"software":[8,13,79],"on":[9,71,145],"large":[10],"open":[11,64,77],"source":[12,65,78],"systems":[14],"such":[15],"as":[16],"Ranger":[17],"make":[18],"occurrence":[19],"faults":[21],"failures":[23,44],"inevitable.":[24],"What":[25],"is":[26,29,68],"not":[27],"inevitable":[28],"that":[30,39],"they":[31],"should":[32,45],"be":[33,48],"allowed":[34],"to":[35,47,116],"go":[36],"undetected,":[37],"nor":[38],"diagnosis":[40],"recovery":[42],"from":[43],"continue":[46],"largely":[49],"manual":[50],"effort":[52],"intensive.":[53],"This":[54,108],"paper":[55,109],"presents":[56,113],"a":[57,88,102,141],"framework":[58,86,129],"for":[59,63,93,137,143],"end-to-end":[60],"fault":[61],"management":[62],"clusters":[66],"which":[67,74,120,130],"being":[69],"developed":[70],"Ranger,":[72],"but":[73],"targets":[75],"general":[76],"based":[80],"clusters.":[81],"elements":[83,126],"the":[85,114,118,125,128,135,146],"are:":[87],"rationalized":[89],"system":[90],"logging":[91],"stack":[92],"Linux,":[94],"low":[95],"overhead":[96],"log":[97],"status":[99],"monitoring,":[100],"multilevel":[103],"suite":[104],"diagnostic":[106],"analyses.":[107],"describes":[110],"this":[111],"framework,":[112],"accomplishments":[115],"date,":[117],"results":[119],"have":[121],"been":[122],"obtained":[123],"with":[124],"are":[131],"in":[132],"place,":[133],"plans":[136],"future":[138],"development":[139],"including":[140],"solicitation":[142],"collaboration":[144],"project.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
