{"id":"https://openalex.org/W1577536172","doi":"https://doi.org/10.1109/hpcsim.2015.7237083","title":"Identifying patterns towards Algorithm Based Fault Tolerance","display_name":"Identifying patterns towards Algorithm Based Fault Tolerance","publication_year":2015,"publication_date":"2015-07-01","ids":{"openalex":"https://openalex.org/W1577536172","doi":"https://doi.org/10.1109/hpcsim.2015.7237083","mag":"1577536172"},"language":"en","primary_location":{"id":"doi:10.1109/hpcsim.2015.7237083","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2015.7237083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072292317","display_name":"Upama Kabir","orcid":null},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Upama Kabir","raw_affiliation_strings":["Department of Computer Science and Software Engineering, Concordia University, Montreal, Canada","Department of Computer Science and Software Engineering, Concordia University, Montr\u00e9al, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Concordia University, Montreal, Canada","institution_ids":["https://openalex.org/I60158472"]},{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Concordia University, Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I60158472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111411144","display_name":"Dhrubajyoti Goswami","orcid":null},"institutions":[{"id":"https://openalex.org/I60158472","display_name":"Concordia University","ror":"https://ror.org/0420zvk78","country_code":"CA","type":"education","lineage":["https://openalex.org/I60158472"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Dhrubajyoti Goswami","raw_affiliation_strings":["Department of Computer Science and Software Engineering, Concordia University, Montreal, Canada","Department of Computer Science and Software Engineering, Concordia University, Montr\u00e9al, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Concordia University, Montreal, Canada","institution_ids":["https://openalex.org/I60158472"]},{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Concordia University, Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I60158472"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5072292317"],"corresponding_institution_ids":["https://openalex.org/I60158472"],"apc_list":null,"apc_paid":null,"fwci":0.9985,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.78442638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"508","last_page":"516"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.860286295413971},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170574307441711},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.7217707633972168},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.5954580903053284},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5601682662963867},{"id":"https://openalex.org/keywords/file-transfer-protocol","display_name":"File Transfer Protocol","score":0.44884949922561646},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4316590428352356},{"id":"https://openalex.org/keywords/software-fault-tolerance","display_name":"Software fault tolerance","score":0.4188176393508911},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38799193501472473},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16071265935897827},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.09254026412963867},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08051902055740356}],"concepts":[{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.860286295413971},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170574307441711},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.7217707633972168},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.5954580903053284},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5601682662963867},{"id":"https://openalex.org/C169485995","wikidata":"https://www.wikidata.org/wiki/Q42283","display_name":"File Transfer Protocol","level":3,"score":0.44884949922561646},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4316590428352356},{"id":"https://openalex.org/C50712370","wikidata":"https://www.wikidata.org/wiki/Q4269346","display_name":"Software fault tolerance","level":3,"score":0.4188176393508911},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38799193501472473},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16071265935897827},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.09254026412963867},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08051902055740356}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcsim.2015.7237083","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2015.7237083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1489689515","https://openalex.org/W1588745704","https://openalex.org/W1963853421","https://openalex.org/W1972116419","https://openalex.org/W1976488112","https://openalex.org/W1981432246","https://openalex.org/W2001495258","https://openalex.org/W2024966997","https://openalex.org/W2036641664","https://openalex.org/W2037523067","https://openalex.org/W2068701179","https://openalex.org/W2072072075","https://openalex.org/W2081409107","https://openalex.org/W2083606889","https://openalex.org/W2083613288","https://openalex.org/W2095487435","https://openalex.org/W2096504919","https://openalex.org/W2100970777","https://openalex.org/W2101077206","https://openalex.org/W2105524676","https://openalex.org/W2106753104","https://openalex.org/W2118467295","https://openalex.org/W2122410182","https://openalex.org/W2123500455","https://openalex.org/W2128854702","https://openalex.org/W2130362816","https://openalex.org/W2144992640","https://openalex.org/W2145670264","https://openalex.org/W2151272421","https://openalex.org/W2156514327","https://openalex.org/W3006153736","https://openalex.org/W3141239549","https://openalex.org/W3150262005","https://openalex.org/W6635445207","https://openalex.org/W6682787132"],"related_works":["https://openalex.org/W1862835629","https://openalex.org/W2136799148","https://openalex.org/W2971479921","https://openalex.org/W2897533804","https://openalex.org/W2106348006","https://openalex.org/W2890506991","https://openalex.org/W3145923041","https://openalex.org/W2946906624","https://openalex.org/W841176518","https://openalex.org/W1978919910"],"abstract_inverted_index":{"Checkpoint":[0],"and":[1,45,58,65,94,170],"recovery":[2,35,104,171],"cost":[3],"imposed":[4],"by":[5],"coordinated":[6],"checkpoint/restart":[7],"(CCP/R)":[8],"is":[9,27,78],"a":[10,28,67,79,85,122,140,147],"crucial":[11],"performance":[12,16],"issue":[13],"for":[14,69,84,113],"high":[15],"computing":[17],"(HPC)":[18],"applications.":[19,117],"In":[20,48],"comparison,":[21],"Algorithm":[22],"Based":[23],"Fault":[24],"Tolerance":[25],"(ABFT)":[26],"promising":[29],"fault":[30,81,103,110,123],"tolerance":[31,82,111,124],"method":[32],"with":[33,100,133],"low":[34],"overhead,":[36],"but":[37],"it":[38],"suffers":[39],"from":[40],"inadequacy":[41],"of":[42,56,60,63,87,116,131,150,168,180],"universal":[43],"applicability":[44],"user":[46,184],"non-transparency.":[47],"this":[49,120],"paper":[50],"we":[51],"address":[52],"the":[53,61,101,109,114,129],"overhead":[54],"problem":[55],"CCP/R":[57,165],"some":[59],"limitations":[62],"ABFT,":[64],"propose":[66],"solution":[68,77,161],"ABFT":[70],"based":[71],"on":[72],"algorithmic":[73,92],"patterns.":[74],"The":[75],"proposed":[76,160],"generic":[80,141],"strategy":[83,112,121],"group":[86,115],"applications":[88],"that":[89,158],"exhibit":[90],"similar":[91],"(structural":[93],"behavioral)":[95],"features.":[96],"These":[97],"features":[98],"together":[99],"minimal":[102],"data":[105],"(critical":[106],"data)":[107],"determine":[108],"We":[118,127],"call":[119],"pattern":[125],"(FTP).":[126],"demonstrate":[128],"idea":[130],"FTP":[132,176],"parallel":[134],"iterative":[135],"deepening":[136],"A*":[137],"(PIDA*)":[138],"search,":[139],"search":[142],"algorithm":[143],"used":[144],"to":[145],"solve":[146],"wide":[148],"range":[149],"discrete":[151],"optimization":[152],"problems":[153],"(DOP).":[154],"Theoretical":[155],"analysis":[156],"shows":[157],"our":[159],"performs":[162],"better":[163],"than":[164],"in":[166,178],"terms":[167],"checkpoint":[169],"time":[172],"overhead.":[173],"Furthermore,":[174],"using":[175],"helps":[177],"separation":[179],"concerns,":[181],"which":[182],"facilitates":[183],"transparency.":[185]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
