{"id":"https://openalex.org/W2075817882","doi":"https://doi.org/10.1145/2701126.2701214","title":"A highly available distributed self-scheduler for exascale computing","display_name":"A highly available distributed self-scheduler for exascale computing","publication_year":2015,"publication_date":"2015-01-08","ids":{"openalex":"https://openalex.org/W2075817882","doi":"https://doi.org/10.1145/2701126.2701214","mag":"2075817882"},"language":"en","primary_location":{"id":"doi:10.1145/2701126.2701214","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2701126.2701214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Ubiquitous Information Management and Communication","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057931026","display_name":"Atsuko Takefusa","orcid":"https://orcid.org/0000-0003-0785-0131"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Atsuko Takefusa","raw_affiliation_strings":["Industrial Science and Technology (AIST)"],"affiliations":[{"raw_affiliation_string":"Industrial Science and Technology (AIST)","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027931271","display_name":"Hidemoto Nakada","orcid":"https://orcid.org/0000-0002-8901-2504"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hidemoto Nakada","raw_affiliation_strings":["Industrial Science and Technology (AIST)"],"affiliations":[{"raw_affiliation_string":"Industrial Science and Technology (AIST)","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004964792","display_name":"Tsutomu Ikegami","orcid":"https://orcid.org/0000-0003-2977-6390"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tsutomu Ikegami","raw_affiliation_strings":["Industrial Science and Technology (AIST)"],"affiliations":[{"raw_affiliation_string":"Industrial Science and Technology (AIST)","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103206634","display_name":"Yoshio Tanaka","orcid":"https://orcid.org/0000-0002-5079-4377"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshio Tanaka","raw_affiliation_strings":["Industrial Science and Technology (AIST)"],"affiliations":[{"raw_affiliation_string":"Industrial Science and Technology (AIST)","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057931026"],"corresponding_institution_ids":["https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":0.7946,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80884027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8472791910171509},{"id":"https://openalex.org/keywords/exascale-computing","display_name":"Exascale computing","score":0.8363990783691406},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7786850929260254},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7738765478134155},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.7163093090057373},{"id":"https://openalex.org/keywords/middleware","display_name":"Middleware (distributed applications)","score":0.5894387364387512},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.5603729486465454},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.48821380734443665},{"id":"https://openalex.org/keywords/mean-time-between-failures","display_name":"Mean time between failures","score":0.4873214662075043},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.40034475922584534},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3303235173225403},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.32003527879714966},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3152869939804077},{"id":"https://openalex.org/keywords/failure-rate","display_name":"Failure rate","score":0.06754270195960999},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.05740204453468323}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8472791910171509},{"id":"https://openalex.org/C2778837361","wikidata":"https://www.wikidata.org/wiki/Q2450880","display_name":"Exascale computing","level":3,"score":0.8363990783691406},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7786850929260254},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7738765478134155},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.7163093090057373},{"id":"https://openalex.org/C169468491","wikidata":"https://www.wikidata.org/wiki/Q146923","display_name":"Middleware (distributed applications)","level":2,"score":0.5894387364387512},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.5603729486465454},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.48821380734443665},{"id":"https://openalex.org/C44154001","wikidata":"https://www.wikidata.org/wiki/Q754940","display_name":"Mean time between failures","level":3,"score":0.4873214662075043},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.40034475922584534},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3303235173225403},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.32003527879714966},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3152869939804077},{"id":"https://openalex.org/C163164238","wikidata":"https://www.wikidata.org/wiki/Q2737027","display_name":"Failure rate","level":2,"score":0.06754270195960999},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.05740204453468323},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2701126.2701214","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2701126.2701214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Ubiquitous Information Management and Communication","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2036527145","https://openalex.org/W2095148640","https://openalex.org/W2105524676","https://openalex.org/W2152538969","https://openalex.org/W2161292693","https://openalex.org/W2168217690","https://openalex.org/W2530248785","https://openalex.org/W2883639715","https://openalex.org/W2953048275"],"related_works":["https://openalex.org/W4318068934","https://openalex.org/W2290952066","https://openalex.org/W4320480730","https://openalex.org/W2056528600","https://openalex.org/W4297752063","https://openalex.org/W3128098196","https://openalex.org/W2572977084","https://openalex.org/W1989530240","https://openalex.org/W2346596560","https://openalex.org/W4285081369"],"abstract_inverted_index":{"A":[0],"hierarchical":[1],"master-worker":[2],"model":[3],"is":[4,20,37,135],"thought":[5],"to":[6,39,83],"be":[7,40,150,156],"a":[8,44,58,64,93],"promising":[9],"programming":[10],"paradigm":[11],"for":[12,27,49,68,122],"exascale-level":[13],"high":[14],"performance":[15],"computers.":[16],"However,":[17],"\"fault":[18],"resiliency\"":[19],"one":[21],"of":[22,78,96],"the":[23,31,69,97,107,113,118,128,132,143,153],"most":[24],"important":[25],"issues":[26],"exascale":[28,50],"computing":[29,51],"because":[30],"Mean":[32],"Time":[33],"Between":[34],"Failure":[35],"(MTBF)":[36],"expected":[38],"short.":[41],"We":[42,90,139],"propose":[43],"fault":[45,86,120,137],"resilient":[46],"middleware":[47,71],"suite":[48],"environments.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56],"design":[57],"highly":[59],"available":[60],"distributed":[61,75,115,147],"self-scheduler":[62,76,116],"as":[63],"resource":[65],"management":[66],"system":[67,95,110],"proposed":[70,74,114],"suite.":[72],"The":[73],"consists":[77],"multiple":[79],"processes":[80],"in":[81],"order":[82],"achieve":[84],"scalability,":[85],"resiliency,":[87],"and":[88,102,130,152],"persistency.":[89],"also":[91,136,140],"develop":[92],"prototype":[94,109],"middleware,":[98,129],"using":[99,106,127],"Apache":[100,103],"ZooKeeper":[101],"Cassandra.":[104],"Experiments":[105],"developed":[108,126],"show":[111],"that":[112,131,142],"achieves":[117],"desired":[119],"resiliency":[121],"an":[123],"application":[124],"program":[125],"scheduler":[133,154],"itself":[134],"resilient.":[138],"confirmed":[141],"overheads":[144],"caused":[145],"by":[146],"processing":[148],"can":[149,155],"reduced,":[151],"scalable.":[157]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
