{"id":"https://openalex.org/W3147521363","doi":"https://doi.org/10.1109/e-science.2006.261111","title":"Job Failure Analysis and Its Implications in a Large-Scale Production Grid","display_name":"Job Failure Analysis and Its Implications in a Large-Scale Production Grid","publication_year":2006,"publication_date":"2006-12-01","ids":{"openalex":"https://openalex.org/W3147521363","doi":"https://doi.org/10.1109/e-science.2006.261111","mag":"3147521363"},"language":"en","primary_location":{"id":"doi:10.1109/e-science.2006.261111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/e-science.2006.261111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2006 Second IEEE International Conference on e-Science and Grid Computing (e-Science'06)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101946485","display_name":"Hui Li","orcid":"https://orcid.org/0000-0003-1841-9144"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Hui Li","raw_affiliation_strings":["Leiden Institute of Advanced Computer Science (LIACS), Leiden University, Leiden, Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden Institute of Advanced Computer Science (LIACS), Leiden University, Leiden, Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090946053","display_name":"David Groep","orcid":"https://orcid.org/0000-0003-1026-6606"},"institutions":[{"id":"https://openalex.org/I4210093566","display_name":"National Institute for Subatomic Physics","ror":"https://ror.org/00f9tz983","country_code":"NL","type":"facility","lineage":["https://openalex.org/I2800991832","https://openalex.org/I4210093566"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"David Groep","raw_affiliation_strings":["National Institute of Nuclear Physics and High Energy Physics, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"National Institute of Nuclear Physics and High Energy Physics, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210093566"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091634957","display_name":"Lex Wolters","orcid":null},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Lex Wolters","raw_affiliation_strings":["Leiden Institute of Advanced Computer Science (LIACS), Leiden University, Leiden, Netherlands"],"affiliations":[{"raw_affiliation_string":"Leiden Institute of Advanced Computer Science (LIACS), Leiden University, Leiden, Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051876723","display_name":"J. A. Templon","orcid":"https://orcid.org/0000-0002-3371-788X"},"institutions":[{"id":"https://openalex.org/I4210093566","display_name":"National Institute for Subatomic Physics","ror":"https://ror.org/00f9tz983","country_code":"NL","type":"facility","lineage":["https://openalex.org/I2800991832","https://openalex.org/I4210093566"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jeff Templon","raw_affiliation_strings":["National Institute of Nuclear Physics and High Energy Physics, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"National Institute of Nuclear Physics and High Energy Physics, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210093566"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101946485"],"corresponding_institution_ids":["https://openalex.org/I121797337"],"apc_list":null,"apc_paid":null,"fwci":1.5089,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.8582768,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"2221","issue":null,"first_page":"27","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.675906240940094},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6690943837165833},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.5676953196525574},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5044890642166138},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4976542294025421},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.49482911825180054},{"id":"https://openalex.org/keywords/grid-computing","display_name":"Grid computing","score":0.48902687430381775},{"id":"https://openalex.org/keywords/life-span","display_name":"Life span","score":0.4703747630119324},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4453568756580353},{"id":"https://openalex.org/keywords/failure-rate","display_name":"Failure rate","score":0.4366976022720337},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.3632147014141083},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.3330274820327759},{"id":"https://openalex.org/keywords/operations-management","display_name":"Operations management","score":0.19120317697525024},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15428343415260315},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11334562301635742},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.10832378268241882},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.09305638074874878},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09150341153144836}],"concepts":[{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.675906240940094},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6690943837165833},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.5676953196525574},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5044890642166138},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4976542294025421},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.49482911825180054},{"id":"https://openalex.org/C70429105","wikidata":"https://www.wikidata.org/wiki/Q249999","display_name":"Grid computing","level":3,"score":0.48902687430381775},{"id":"https://openalex.org/C2988516024","wikidata":"https://www.wikidata.org/wiki/Q441195","display_name":"Life span","level":2,"score":0.4703747630119324},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4453568756580353},{"id":"https://openalex.org/C163164238","wikidata":"https://www.wikidata.org/wiki/Q2737027","display_name":"Failure rate","level":2,"score":0.4366976022720337},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.3632147014141083},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3330274820327759},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.19120317697525024},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15428343415260315},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11334562301635742},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.10832378268241882},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.09305638074874878},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09150341153144836},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C74909509","wikidata":"https://www.wikidata.org/wiki/Q10387","display_name":"Gerontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/e-science.2006.261111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/e-science.2006.261111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2006 Second IEEE International Conference on e-Science and Grid Computing (e-Science'06)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320283","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1513326124","https://openalex.org/W1602812726","https://openalex.org/W1765276664","https://openalex.org/W2060416214","https://openalex.org/W2081780581","https://openalex.org/W2100785383","https://openalex.org/W2147176980","https://openalex.org/W2160821994","https://openalex.org/W2166308375","https://openalex.org/W3162989687","https://openalex.org/W6635916538","https://openalex.org/W6638048240","https://openalex.org/W6671134517","https://openalex.org/W6681675132","https://openalex.org/W6684448063","https://openalex.org/W7066196797"],"related_works":["https://openalex.org/W44553394","https://openalex.org/W2481141926","https://openalex.org/W2104023905","https://openalex.org/W2156309142","https://openalex.org/W1982114871","https://openalex.org/W2004018931","https://openalex.org/W2396704942","https://openalex.org/W2081571870","https://openalex.org/W2755738839","https://openalex.org/W2136505494"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3,23,201],"present":[4],"an":[5],"initial":[6],"analysis":[7,41],"of":[8,31,75,121,130,144,155,168,181],"job":[9,57,76],"failures":[10,58,170],"in":[11,21,60,110,221],"a":[12,137,165],"large-scale":[13],"data-intensive":[14],"Grid.":[15],"Based":[16,196],"on":[17,136,197],"three":[18],"representative":[19],"periods":[20],"production,":[22],"characterize":[24],"the":[25,40,47,61,89,99,106,111,131,190,198,208],"interarrival":[26,178],"times":[27],"and":[28,39,80,92,164,189,216,227],"life":[29,73,191],"spans":[30,74,192],"failed":[32,133,145,156,182],"jobs.":[33,124],"Different":[34],"failure":[35,90,108,199,215],"types":[36],"are":[37,78,113,147,171,184,230],"distinguished":[38],"is":[42,63,205],"carried":[43],"out":[44],"further":[45,83],"at":[46,98],"Virtual":[48],"Organization":[49],"(VO)":[50],"level.":[51,101],"The":[52,142,177],"spatial":[53],"behavior,":[54],"namely":[55],"where":[56],"occur":[59],"Grid,":[62],"also":[64,231],"examined.":[65],"Cross-correlation":[66],"structures,":[67],"including":[68],"how":[69],"arrivals":[70],"correlate":[71],"with":[72],"failures,":[77],"analyzed":[79],"illustrated.":[81],"We":[82],"investigate":[84],"statistical":[85],"models":[86],"to":[87,119,128,186,212],"fit":[88],"data":[91],"propose":[93],"several":[94,160,174],"failureaware":[95],"scheduling":[96],"strategies":[97],"Grid":[100,112,209],"Our":[102],"results":[103],"show":[104],"that":[105,203],"overall":[107],"rates":[109],"quite":[114],"significant,":[115],"ranging":[116],"from":[117,159],"25%":[118],"33%":[120],"all":[122],"submitted":[123],"However,":[125],"only":[126],"5%":[127],"8%":[129],"jobs":[132,146,157,183],"after":[134],"running":[135],"certain":[138],"Computing":[139],"Element":[140],"(CE).":[141],"rest":[143],"aborted":[148],"or":[149],"cancelled":[150],"without":[151],"running.":[152],"A":[153],"majority":[154],"come":[158],"large":[161,166],"production":[162],"VOs":[163],"amount":[167],"these":[169],"centered":[172],"around":[173],"main":[175],"CEs.":[176],"time":[179],"processes":[180],"shown":[185],"be":[187],"bursty,":[188],"exhibit":[193],"strong":[194],"autocorrelations.":[195],"patterns":[200],"argue":[202],"it":[204,218],"important":[206],"for":[207],"resource":[210],"brokers":[211],"track":[213],"historical":[214],"take":[217],"into":[219],"account":[220],"decision":[222],"making.":[223],"Some":[224],"proactive":[225],"measures":[226],"accountability":[228],"issues":[229],"discussed.":[232]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
