{"id":"https://openalex.org/W4220730878","doi":"https://doi.org/10.1145/3483447","title":"A Study of Failure Recovery and Logging of High-Performance Parallel File Systems","display_name":"A Study of Failure Recovery and Logging of High-Performance Parallel File Systems","publication_year":2022,"publication_date":"2022-03-29","ids":{"openalex":"https://openalex.org/W4220730878","doi":"https://doi.org/10.1145/3483447"},"language":"en","primary_location":{"id":"doi:10.1145/3483447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483447","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3483447","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050327643","display_name":"Runzhou Han","orcid":"https://orcid.org/0000-0003-1440-7568"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Runzhou Han","raw_affiliation_strings":["Iowa State University, Ames, Iowa"],"affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, Iowa","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005793670","display_name":"Om Rameshwar Gatla","orcid":"https://orcid.org/0000-0001-6442-2526"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Om Rameshwar Gatla","raw_affiliation_strings":["Iowa State University, Ames, Iowa"],"affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, Iowa","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004195454","display_name":"Mai Zheng","orcid":"https://orcid.org/0000-0002-0741-3436"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mai Zheng","raw_affiliation_strings":["Iowa State University, Ames, Iowa"],"affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, Iowa","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064790545","display_name":"Jinrui Cao","orcid":"https://orcid.org/0000-0001-6013-1115"},"institutions":[{"id":"https://openalex.org/I1327163397","display_name":"State University of New York","ror":"https://ror.org/01q1z8k08","country_code":"US","type":"education","lineage":["https://openalex.org/I1327163397"]},{"id":"https://openalex.org/I63475719","display_name":"SUNY Plattsburgh","ror":"https://ror.org/033zmj163","country_code":"US","type":"education","lineage":["https://openalex.org/I63475719"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinrui Cao","raw_affiliation_strings":["State University of New York at Plattsburgh, Plattsburgh, New York"],"affiliations":[{"raw_affiliation_string":"State University of New York at Plattsburgh, Plattsburgh, New York","institution_ids":["https://openalex.org/I63475719","https://openalex.org/I1327163397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100366375","display_name":"Di Zhang","orcid":"https://orcid.org/0000-0001-8722-0177"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di Zhang","raw_affiliation_strings":["North Carolina University at Charlotte, Charlotte, North Carolina"],"affiliations":[{"raw_affiliation_string":"North Carolina University at Charlotte, Charlotte, North Carolina","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012002926","display_name":"Dong Dai","orcid":"https://orcid.org/0000-0003-4078-8149"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Dai","raw_affiliation_strings":["North Carolina University at Charlotte, Charlotte, North Carolina"],"affiliations":[{"raw_affiliation_string":"North Carolina University at Charlotte, Charlotte, North Carolina","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100454473","display_name":"Yong Chen","orcid":"https://orcid.org/0009-0001-6525-6299"},"institutions":[{"id":"https://openalex.org/I12315562","display_name":"Texas Tech University","ror":"https://ror.org/0405mnx93","country_code":"US","type":"education","lineage":["https://openalex.org/I12315562"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yong Chen","raw_affiliation_strings":["Texas Tech University, Lubbock, Texas"],"affiliations":[{"raw_affiliation_string":"Texas Tech University, Lubbock, Texas","institution_ids":["https://openalex.org/I12315562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014312938","display_name":"Jonathan Cook","orcid":"https://orcid.org/0000-0003-4907-5719"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Cook","raw_affiliation_strings":["New Mexico State University, Las Cruces, New Mexico"],"affiliations":[{"raw_affiliation_string":"New Mexico State University, Las Cruces, New Mexico","institution_ids":["https://openalex.org/I10052268"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5050327643"],"corresponding_institution_ids":["https://openalex.org/I173911158"],"apc_list":null,"apc_paid":null,"fwci":2.1405,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.87372299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"18","issue":"2","first_page":"1","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lustre","display_name":"Lustre (file system)","score":0.9586149454116821},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8514190316200256},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.582029402256012},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4868130385875702},{"id":"https://openalex.org/keywords/logging","display_name":"Logging","score":0.47855788469314575},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.438056617975235},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3859551250934601},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3253090977668762}],"concepts":[{"id":"https://openalex.org/C180699724","wikidata":"https://www.wikidata.org/wiki/Q1877782","display_name":"Lustre (file system)","level":3,"score":0.9586149454116821},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8514190316200256},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.582029402256012},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4868130385875702},{"id":"https://openalex.org/C125620115","wikidata":"https://www.wikidata.org/wiki/Q845249","display_name":"Logging","level":2,"score":0.47855788469314575},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.438056617975235},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3859551250934601},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3253090977668762},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3483447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483447","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3483447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483447","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2507283498","display_name":null,"funder_award_id":"1943204","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3059875793","display_name":null,"funder_award_id":"1910747","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5050124249","display_name":"SHF:  Small:  Collaborative Research:  Uncovering Vulnerabilities in Parallel File Systems for Reliable High Performance Computing","funder_award_id":"1853714","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8296642885","display_name":null,"funder_award_id":"CNS-1943204","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4220730878.pdf","grobid_xml":"https://content.openalex.org/works/W4220730878.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1444959585","https://openalex.org/W1838389500","https://openalex.org/W1905719725","https://openalex.org/W2002934700","https://openalex.org/W2003131282","https://openalex.org/W2010628428","https://openalex.org/W2023718005","https://openalex.org/W2024993563","https://openalex.org/W2039157918","https://openalex.org/W2043719859","https://openalex.org/W2044819105","https://openalex.org/W2063564815","https://openalex.org/W2073742357","https://openalex.org/W2078186835","https://openalex.org/W2079267582","https://openalex.org/W2096761130","https://openalex.org/W2100307454","https://openalex.org/W2103315535","https://openalex.org/W2106468386","https://openalex.org/W2144992723","https://openalex.org/W2146141344","https://openalex.org/W2154376164","https://openalex.org/W2523146812","https://openalex.org/W2548592627","https://openalex.org/W2564990060","https://openalex.org/W2734941459","https://openalex.org/W2767094836","https://openalex.org/W2774510177","https://openalex.org/W2808563793","https://openalex.org/W2890169599","https://openalex.org/W2894672808","https://openalex.org/W2895690683","https://openalex.org/W2899050079","https://openalex.org/W2903092728","https://openalex.org/W2913598742","https://openalex.org/W2933883078","https://openalex.org/W2979357014","https://openalex.org/W2981593401","https://openalex.org/W2981689050","https://openalex.org/W3011718251","https://openalex.org/W3047947484","https://openalex.org/W3121569598","https://openalex.org/W3160434771","https://openalex.org/W3169578405","https://openalex.org/W3185152627","https://openalex.org/W4238631751","https://openalex.org/W4248578633","https://openalex.org/W4249103936","https://openalex.org/W4254234063"],"related_works":["https://openalex.org/W4311545198","https://openalex.org/W2038186724","https://openalex.org/W2166828374","https://openalex.org/W2078494689","https://openalex.org/W2357409795","https://openalex.org/W3159974995","https://openalex.org/W2151023556","https://openalex.org/W1509932472","https://openalex.org/W2576355851","https://openalex.org/W2765129600"],"abstract_inverted_index":{"Large-scale":[0],"parallel":[1],"file":[2],"systems":[3,31],"(PFSs)":[4],"play":[5],"an":[6],"essential":[7],"role":[8],"in":[9,48,78,113,124,180,246,263,289,292,303,322],"high-performance":[10,331],"computing":[11],"(HPC).":[12],"However,":[13],"despite":[14],"their":[15,17],"importance,":[16],"reliability":[18],"is":[19,105],"much":[20],"less":[21],"studied":[22],"or":[23,32,208,236],"understood":[24],"compared":[25],"with":[26],"that":[27,203],"of":[28,69,76,90,120,131,167,182,222,258,301,305],"local":[29],"storage":[30,34,122],"cloud":[33],"systems.":[35],"Recent":[36],"failure":[37,71,85,118,162,183,306],"incidents":[38],"at":[39],"real":[40],"HPC":[41],"centers":[42],"have":[43,242],"exposed":[44],"the":[45,54,63,70,84,91,117,125,138,160,168,176,219,224,255,259,277,285,290,296,313,323],"latent":[46],"defects":[47],"PFS":[49,126,139],"clusters":[50],"as":[51,53],"well":[52],"urgent":[55],"need":[56],"for":[57,329],"a":[58,67,96,129,189,214,269],"systematic":[59],"analysis.":[60],"To":[61],"address":[62],"challenge,":[64],"we":[65,94,145,201,283],"perform":[66],"study":[68,149,311],"recovery":[72,86,163,190,220,250],"and":[73,87,109,135,155,164,171,196,248,294,299,312,316,325],"logging":[74,88,165],"mechanisms":[75],"PFSs":[77,108,170,177,302,328],"this":[79,310],"article.":[80],"First,":[81],"to":[82,107,111,148,194,228,268,273],"trigger":[83,209],"operations":[89],"target":[92,169],"PFS,":[93],"introduce":[95],"black-box":[97],"fault":[98,133,142],"injection":[99],"tool":[100,315],"called":[101,192],"PFault":[102,115,147],",":[103],"which":[104,265],"transparent":[106],"easy":[110],"deploy":[112],"practice.":[114],"emulates":[116],"state":[119],"individual":[121],"nodes":[123],"based":[127],"on":[128],"set":[130,272],"pre-defined":[132],"models":[134],"enables":[136],"examining":[137],"behavior":[140],"under":[141],"systematically.":[143],"Next,":[144],"apply":[146],"two":[150],"widely":[151],"used":[152],"PFSs:":[153],"Lustre":[154,187,229,279],"BeeGFS.":[156],"Our":[157],"analysis":[158],"reveals":[159],"unique":[161,297],"patterns":[166,298],"identifies":[172],"multiple":[173],"cases":[174],"where":[175],"are":[178],"imperfect":[179],"terms":[181,304],"handling.":[184],"For":[185],"example,":[186],"includes":[188],"component":[191,251],"LFSCK":[193,204],"detect":[195],"fix":[197],"PFS-level":[198],"inconsistencies,":[199],"but":[200],"find":[202],"itself":[205],"may":[206,230],"hang":[207,235],"kernel":[210],"panics":[211],"when":[212],"scanning":[213],"corrupted":[215],"Lustre.":[216],"Even":[217],"after":[218],"attempt":[221],"LFSCK,":[223],"subsequent":[225],"workloads":[226],"applied":[227],"still":[231],"behave":[232],"abnormally":[233],"(e.g.,":[234],"report":[237],"I/O":[238],"errors).":[239],"Similar":[240],"issues":[241],"also":[243],"been":[244],"observed":[245,262],"BeeGFS":[247],"its":[249],"BeeGFS-FSCK.":[252],"We":[253,308],"analyze":[254],"root":[256],"causes":[257],"abnormal":[260],"symptoms":[261],"depth,":[264],"has":[266],"led":[267],"new":[270],"patch":[271],"be":[274],"merged":[275],"into":[276],"coming":[278],"release.":[280],"In":[281],"addition,":[282],"characterize":[284],"extensive":[286],"logs":[287],"generated":[288],"experiments":[291],"detail":[293],"identify":[295],"limitations":[300],"logging.":[307],"hope":[309],"resulting":[314],"dataset":[317],"can":[318],"facilitate":[319],"follow-up":[320],"research":[321],"communities":[324],"help":[326],"improve":[327],"reliable":[330],"computing.":[332]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
