{"id":"https://openalex.org/W4410887731","doi":"https://doi.org/10.1109/syscon64521.2025.11014800","title":"FCS: A Fault-Tolerance Asynchronous Federated Learning System with Model Checkpoint Storage","display_name":"FCS: A Fault-Tolerance Asynchronous Federated Learning System with Model Checkpoint Storage","publication_year":2025,"publication_date":"2025-04-07","ids":{"openalex":"https://openalex.org/W4410887731","doi":"https://doi.org/10.1109/syscon64521.2025.11014800"},"language":"en","primary_location":{"id":"doi:10.1109/syscon64521.2025.11014800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/syscon64521.2025.11014800","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International systems Conference (SysCon)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5117770708","display_name":"Norwich Mungkalaton","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Norwich Mungkalaton","raw_affiliation_strings":["School of Computer Science, The University of Sydney,NSW,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney,NSW,Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074021106","display_name":"Songpon Srisawai","orcid":null},"institutions":[{"id":"https://openalex.org/I82828225","display_name":"King Mongkut's University of Technology North Bangkok","ror":"https://ror.org/04fy6jb97","country_code":"TH","type":"education","lineage":["https://openalex.org/I82828225"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Songpon Srisawai","raw_affiliation_strings":["TGGS, King Mongkut&#x0027;s University of Technology North Bangkok,Bangkok,Thailand"],"affiliations":[{"raw_affiliation_string":"TGGS, King Mongkut&#x0027;s University of Technology North Bangkok,Bangkok,Thailand","institution_ids":["https://openalex.org/I82828225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068857843","display_name":"Yodsawalai Chodpathumwan","orcid":null},"institutions":[{"id":"https://openalex.org/I82828225","display_name":"King Mongkut's University of Technology North Bangkok","ror":"https://ror.org/04fy6jb97","country_code":"TH","type":"education","lineage":["https://openalex.org/I82828225"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Yodsawalai Chodpathumwan","raw_affiliation_strings":["TGGS, King Mongkut&#x0027;s University of Technology North Bangkok,Bangkok,Thailand"],"affiliations":[{"raw_affiliation_string":"TGGS, King Mongkut&#x0027;s University of Technology North Bangkok,Bangkok,Thailand","institution_ids":["https://openalex.org/I82828225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055460914","display_name":"Wei Li","orcid":"https://orcid.org/0000-0001-8446-5427"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["School of Computer Science, The University of Sydney,NSW,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney,NSW,Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015993565","display_name":"Albert Y. Zomaya","orcid":"https://orcid.org/0000-0002-3090-1059"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Albert Zomaya","raw_affiliation_strings":["School of Computer Science, The University of Sydney,NSW,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney,NSW,Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5117770708"],"corresponding_institution_ids":["https://openalex.org/I129604602"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14918789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.768463671207428},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.6445540189743042},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.6142482757568359},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.34675687551498413},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.33522462844848633},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.33314573764801025},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.17546036839485168}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.768463671207428},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6445540189743042},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.6142482757568359},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34675687551498413},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.33522462844848633},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.33314573764801025},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.17546036839485168}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/syscon64521.2025.11014800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/syscon64521.2025.11014800","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International systems Conference (SysCon)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2083842231","https://openalex.org/W2133376447","https://openalex.org/W2978709794","https://openalex.org/W3103989228","https://openalex.org/W3154608090","https://openalex.org/W3213145617","https://openalex.org/W4286888406","https://openalex.org/W4308426171","https://openalex.org/W4309872119","https://openalex.org/W4312328698","https://openalex.org/W4318619660","https://openalex.org/W4385270372","https://openalex.org/W6756756286","https://openalex.org/W6759226220","https://openalex.org/W6759238902","https://openalex.org/W6781175393","https://openalex.org/W6787673396","https://openalex.org/W6796096428"],"related_works":["https://openalex.org/W2116677773","https://openalex.org/W2155261584","https://openalex.org/W2584231425","https://openalex.org/W2144438995","https://openalex.org/W1577886464","https://openalex.org/W2164896586","https://openalex.org/W1593559483","https://openalex.org/W2108334564","https://openalex.org/W2111125783","https://openalex.org/W2100367016"],"abstract_inverted_index":{"Federated":[0,65],"Learning":[1],"(FL)":[2],"is":[3,71],"a":[4,72,196],"decentralized":[5],"machine":[6],"learning":[7],"framework":[8],"that":[9,76,135,156,192],"enables":[10],"large-scale":[11],"training":[12,45,53,102,122,173,199],"across":[13],"multiple":[14],"devices":[15,27],"while":[16],"maintaining":[17,40],"strong":[18],"privacy":[19],"guarantees.":[20],"However,":[21],"as":[22],"the":[23,35,57,64,90,95,100,107,112,116,119,127,136,157,162,167,171],"number":[24],"of":[25,94,118,170,205],"participating":[26],"increases,":[28],"communication":[29,42,60,92,108,151],"heterogeneity":[30],"creates":[31],"significant":[32],"challenges":[33],"for":[34,47,201],"aggregation":[36,82,131],"server,":[37],"especially":[38],"in":[39,149],"reliable":[41],"and":[43,55,97,123,144,166,177],"guaranteeing":[44],"progress":[46],"all":[48],"devices.":[49],"To":[50],"ensure":[51],"FL":[52,80,101,172,184,198],"stability":[54],"relax":[56],"strict":[58,91],"synchronous":[59,183],"requirements,":[61],"we":[62,190],"propose":[63],"Check-point":[66],"Storage":[67],"(FCS)":[68],"protocol.":[69],"FCS":[70,105,137,158,193],"message-free":[73],"coordination":[74],"protocol":[75,138,159],"supports":[77],"fully":[78],"asynchronous":[79],"model":[81,85,130],"by":[83,111],"leveraging":[84],"checkpointing":[86],"storage":[87],"to":[88,114,181],"decouple":[89],"dependency":[93],"device":[96,142,188,203],"server":[98,113],"during":[99],"process.":[103,132],"Specifically,":[104],"isolates":[106],"instructions":[109],"required":[110],"manage":[115],"availability":[117,143],"device's":[120],"local":[121],"weight":[124],"submission":[125],"from":[126],"server's":[128],"global":[129],"We":[133],"show":[134,191],"better":[139],"handles":[140],"intermittent":[141,187],"discuss":[145],"its":[146],"robustness":[147],"property":[148],"avoiding":[150],"deadlock.":[152],"Experiment":[153],"results":[154],"demonstrate":[155],"improves":[160],"both":[161],"round-to-accuracy":[163],"convergence":[164],"rates":[165],"time-to-target":[168],"accuracy":[169],"tasks":[174],"on":[175],"FEMNIST":[176],"CIFAR-10":[178],"datasets.":[179],"Compared":[180],"state-of-the-art":[182],"protocols":[185],"under":[186],"availability,":[189],"could":[194],"be":[195],"robust":[197],"system":[200],"heterogeneous":[202],"networks":[204],"any":[206],"scale.":[207]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
