{"id":"https://openalex.org/W3137952054","doi":"https://doi.org/10.1109/bigdata50022.2020.9378474","title":"Chiron: Optimizing Fault Tolerance in QoS-aware Distributed Stream Processing Jobs","display_name":"Chiron: Optimizing Fault Tolerance in QoS-aware Distributed Stream Processing Jobs","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3137952054","doi":"https://doi.org/10.1109/bigdata50022.2020.9378474","mag":"3137952054"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9378474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.gla.ac.uk/view/author/66347.html>","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013926492","display_name":"Morgan K. Geldenhuys","orcid":"https://orcid.org/0009-0006-5037-8353"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Morgan K. Geldenhuys","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084056435","display_name":"Lauritz Thamsen","orcid":"https://orcid.org/0000-0003-3755-1503"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lauritz Thamsen","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042349846","display_name":"Odej Kao","orcid":"https://orcid.org/0000-0001-6454-6799"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Odej Kao","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5013926492"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":2.5036,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.92006997,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"434","last_page":"440"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8326464891433716},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.8009527325630188},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6125890016555786},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.5937541723251343},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5246144533157349},{"id":"https://openalex.org/keywords/high-availability","display_name":"High availability","score":0.487662672996521},{"id":"https://openalex.org/keywords/stream-processing","display_name":"Stream processing","score":0.4727367162704468},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4680193364620209},{"id":"https://openalex.org/keywords/software-fault-tolerance","display_name":"Software fault tolerance","score":0.44938933849334717},{"id":"https://openalex.org/keywords/service-quality","display_name":"Service quality","score":0.4250616431236267},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.41639959812164307},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.37938088178634644},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3765566349029541},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3517634868621826},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3234078884124756},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2422429323196411},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08354410529136658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8326464891433716},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.8009527325630188},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6125890016555786},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.5937541723251343},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5246144533157349},{"id":"https://openalex.org/C65813073","wikidata":"https://www.wikidata.org/wiki/Q1622420","display_name":"High availability","level":2,"score":0.487662672996521},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.4727367162704468},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4680193364620209},{"id":"https://openalex.org/C50712370","wikidata":"https://www.wikidata.org/wiki/Q4269346","display_name":"Software fault tolerance","level":3,"score":0.44938933849334717},{"id":"https://openalex.org/C140781008","wikidata":"https://www.wikidata.org/wiki/Q1221081","display_name":"Service quality","level":3,"score":0.4250616431236267},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.41639959812164307},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.37938088178634644},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3765566349029541},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3517634868621826},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3234078884124756},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2422429323196411},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08354410529136658},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9378474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.gla.ac.uk:268151","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/view/author/66347.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"pmh:oai:eprints.gla.ac.uk:268151","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/view/author/66347.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.4699999988079071,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W63848143","https://openalex.org/W1972914000","https://openalex.org/W1981432246","https://openalex.org/W1984564341","https://openalex.org/W1993143336","https://openalex.org/W2033656974","https://openalex.org/W2057369252","https://openalex.org/W2105947650","https://openalex.org/W2124439663","https://openalex.org/W2133046454","https://openalex.org/W2141992894","https://openalex.org/W2163295644","https://openalex.org/W2189465200","https://openalex.org/W2320681231","https://openalex.org/W2566979091","https://openalex.org/W2913854892","https://openalex.org/W3008301576","https://openalex.org/W3016397177","https://openalex.org/W6687322159","https://openalex.org/W6731596640","https://openalex.org/W6776211100"],"related_works":["https://openalex.org/W2347088994","https://openalex.org/W2572716084","https://openalex.org/W4317418912","https://openalex.org/W2971479921","https://openalex.org/W3145923041","https://openalex.org/W2946906624","https://openalex.org/W841176518","https://openalex.org/W2971589461","https://openalex.org/W2363040373","https://openalex.org/W3215034299"],"abstract_inverted_index":{"Fault":[0],"tolerance":[1,38,74,133],"is":[2,56,78,141],"a":[3,79,116],"property":[4],"which":[5,60,98],"needs":[6],"deeper":[7],"consideration":[8],"when":[9],"dealing":[10],"with":[11,97,121,157],"streaming":[12,103],"jobs":[13,77],"requiring":[14],"high":[15],"levels":[16],"of":[17,25,67,118,130],"availability":[18],"and":[19,39,51,70,81,136,147,160],"low-latency":[20],"processing":[21],"even":[22],"in":[23,102],"case":[24],"failures":[26,47],"where":[27],"Quality-of-Service":[28,150],"constraints":[29],"must":[30],"be":[31],"adhered":[32],"to.":[33],"Typically,":[34],"systems":[35],"achieve":[36],"fault":[37,73,132],"the":[40,64,68,95,119,122,128],"ability":[41],"to":[42,126,143],"recover":[43],"automatically":[44,93],"from":[45],"partial":[46],"by":[48],"implementing":[49],"Checkpoint":[50],"Rollback":[52],"Recovery.":[53],"However,":[54],"this":[55,85],"an":[57,90],"expensive":[58],"operation":[59],"impacts":[61],"negatively":[62],"on":[63,134],"overall":[65],"performance":[66,135,145],"system":[69],"manually":[71],"optimizing":[72,94],"for":[75,92],"specific":[76],"difficult":[80],"time":[82],"consuming":[83],"task.In":[84],"paper":[86],"we":[87],"introduce":[88],"Chiron,":[89],"approach":[91],"frequency":[96],"checkpoints":[99],"are":[100,112],"performed":[101],"jobs.":[104],"For":[105],"any":[106],"chosen":[107],"job,":[108],"parallel":[109],"profiling":[110],"runs":[111],"performed,":[113],"each":[114],"containing":[115],"variant":[117],"configurations,":[120],"resulting":[123],"metrics":[124],"used":[125],"model":[127],"impact":[129],"checkpoint-based":[131],"availability.":[137],"Understanding":[138],"these":[139],"relationships":[140],"key":[142],"minimizing":[144],"objectives":[146],"meeting":[148],"strict":[149],"constraints.":[151],"We":[152],"implemented":[153],"Chiron":[154],"prototypically":[155],"together":[156],"Apache":[158],"Flink":[159],"demonstrate":[161],"its":[162],"usefulness":[163],"experimentally.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
