{"id":"https://openalex.org/W2162342269","doi":"https://doi.org/10.1109/dsnw.2013.6615513","title":"Predicting job completion times using system logs in supercomputing clusters","display_name":"Predicting job completion times using system logs in supercomputing clusters","publication_year":2013,"publication_date":"2013-06-01","ids":{"openalex":"https://openalex.org/W2162342269","doi":"https://doi.org/10.1109/dsnw.2013.6615513","mag":"2162342269"},"language":"en","primary_location":{"id":"doi:10.1109/dsnw.2013.6615513","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsnw.2013.6615513","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 43rd Annual IEEE/IFIP Conference on Dependable Systems and Networks Workshop (DSN-W)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022191974","display_name":"Xin Chen","orcid":"https://orcid.org/0000-0003-3459-8912"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Xin Chen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of British Columbia, Vancouver, V6T1Z4, Canada","Department of Electrical and Computer Engineering, University of British Columbia, Vancouver, BC, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of British Columbia, Vancouver, V6T1Z4, Canada","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of British Columbia, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100882715","display_name":"Charng\u2010Da Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Charng-Da Lu","raw_affiliation_strings":["Buffalo, NY 14214, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Buffalo, NY 14214, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073641368","display_name":"Karthik Pattabiraman","orcid":"https://orcid.org/0000-0003-2380-3415"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Karthik Pattabiraman","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of British Columbia, Vancouver, V6T1Z4, Canada","Department of Electrical and Computer Engineering, University of British Columbia, Vancouver, BC, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of British Columbia, Vancouver, V6T1Z4, Canada","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of British Columbia, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I141945490"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2636,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.89867037,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.772553563117981},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7667032480239868},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.7351199984550476},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.7251839637756348},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6681139469146729},{"id":"https://openalex.org/keywords/fault","display_name":"Fault (geology)","score":0.4534873366355896},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4281045198440552},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.37919047474861145},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3504179120063782},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2216435670852661},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15899285674095154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.772553563117981},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7667032480239868},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.7351199984550476},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.7251839637756348},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6681139469146729},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.4534873366355896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4281045198440552},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.37919047474861145},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3504179120063782},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2216435670852661},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15899285674095154},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dsnw.2013.6615513","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsnw.2013.6615513","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 43rd Annual IEEE/IFIP Conference on Dependable Systems and Networks Workshop (DSN-W)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W35047313","https://openalex.org/W135621451","https://openalex.org/W1493147916","https://openalex.org/W1536508037","https://openalex.org/W1965626898","https://openalex.org/W2027380800","https://openalex.org/W2095157729","https://openalex.org/W2102738873","https://openalex.org/W2106067409","https://openalex.org/W2107263349","https://openalex.org/W2119018856","https://openalex.org/W2125838338","https://openalex.org/W2136159049","https://openalex.org/W2143996186","https://openalex.org/W2145864256","https://openalex.org/W2147176980","https://openalex.org/W2158907675","https://openalex.org/W3149060731","https://openalex.org/W4249923483","https://openalex.org/W6601359990","https://openalex.org/W6605488818","https://openalex.org/W6629450101","https://openalex.org/W6632233742","https://openalex.org/W6674391746","https://openalex.org/W6681675132"],"related_works":["https://openalex.org/W2384867379","https://openalex.org/W2329539859","https://openalex.org/W2227905990","https://openalex.org/W2765823764","https://openalex.org/W3214280620","https://openalex.org/W3191490922","https://openalex.org/W2794038527","https://openalex.org/W2151092287","https://openalex.org/W2523801036","https://openalex.org/W3157768780"],"abstract_inverted_index":{"Most":[0],"large":[1],"systems":[2],"such":[3],"as":[4],"HPC/cloud":[5],"computing":[6],"clusters":[7],"and":[8,51,68],"data":[9],"centers":[10],"are":[11,19],"built":[12],"from":[13,76],"commercial":[14],"off-the-shelf":[15],"components.":[16],"System":[17],"logs":[18,35],"usually":[20],"the":[21,30,47,86,94,106],"main":[22],"source":[23],"of":[24,49,99,129,136],"choice":[25],"to":[26,36,46,120],"gain":[27],"insights":[28],"into":[29],"system":[31,96],"issues.":[32],"Therefore,":[33],"mining":[34],"diagnose":[37],"anomalies":[38],"has":[39],"been":[40],"an":[41,64,71,134],"active":[42],"research":[43],"area.":[44],"Due":[45],"lack":[48],"organization":[50],"semantic":[52],"consistency":[53],"in":[54],"commodity":[55],"PC":[56],"clusters'":[57],"logs,":[58],"what":[59],"constitutes":[60],"a":[61,90,100],"fault":[62],"or":[63],"error":[65,135],"is":[66,79],"subjective":[67],"thus":[69],"building":[70],"automatic":[72],"failure":[73],"prediction":[74],"model":[75],"log":[77,97],"messages":[78,98],"hard.":[80],"In":[81],"this":[82],"paper":[83],"we":[84,104],"sidestep":[85],"difficulty":[87],"by":[88],"asking":[89],"different":[91],"question:":[92],"Given":[93],"concomitant":[95],"running":[101],"job,":[102],"can":[103,126],"predict":[105,127],"job's":[107],"remaining":[108,131],"time?":[109],"We":[110],"adopt":[111],"Hidden":[112],"Markov":[113],"Model":[114],"(HMM)":[115],"coupled":[116],"with":[117,133],"frequency":[118],"analysis":[119],"achieve":[121],"this.":[122],"Our":[123],"HMM":[124],"approach":[125],"75%":[128],"jobs'":[130],"times":[132],"less":[137],"than":[138],"200":[139],"seconds.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
