{"id":"https://openalex.org/W3117311838","doi":"https://doi.org/10.1109/hpec43674.2020.9286139","title":"Identifying Execution Anomalies for Data Intensive Workflows Using Lightweight ML Techniques","display_name":"Identifying Execution Anomalies for Data Intensive Workflows Using Lightweight ML Techniques","publication_year":2020,"publication_date":"2020-09-22","ids":{"openalex":"https://openalex.org/W3117311838","doi":"https://doi.org/10.1109/hpec43674.2020.9286139","mag":"3117311838"},"language":"en","primary_location":{"id":"doi:10.1109/hpec43674.2020.9286139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec43674.2020.9286139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390490","display_name":"Cong Wang","orcid":"https://orcid.org/0000-0002-5300-0122"},"institutions":[{"id":"https://openalex.org/I69048370","display_name":"Renaissance Computing Institute","ror":"https://ror.org/01s91ey96","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535","https://openalex.org/I170897317","https://openalex.org/I69048370"]},{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cong Wang","raw_affiliation_strings":["RENCI, University of North Carolina at Chapel Hill, Chapel Hill, NC"],"affiliations":[{"raw_affiliation_string":"RENCI, University of North Carolina at Chapel Hill, Chapel Hill, NC","institution_ids":["https://openalex.org/I114027177","https://openalex.org/I69048370"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016335226","display_name":"George Papadimitriou","orcid":"https://orcid.org/0000-0001-9384-5034"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Papadimitriou","raw_affiliation_strings":["Information Sciences Institute, University of Southern California, Marina Del Rey, CA"],"affiliations":[{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, Marina Del Rey, CA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111878009","display_name":"Mariam Kiran","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mariam Kiran","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063480283","display_name":"Anirban Mandal","orcid":"https://orcid.org/0000-0001-5145-8618"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I69048370","display_name":"Renaissance Computing Institute","ror":"https://ror.org/01s91ey96","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535","https://openalex.org/I170897317","https://openalex.org/I69048370"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirban Mandal","raw_affiliation_strings":["RENCI, University of North Carolina at Chapel Hill, Chapel Hill, NC"],"affiliations":[{"raw_affiliation_string":"RENCI, University of North Carolina at Chapel Hill, Chapel Hill, NC","institution_ids":["https://openalex.org/I114027177","https://openalex.org/I69048370"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031941569","display_name":"Ewa Deelman","orcid":"https://orcid.org/0000-0001-5106-503X"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ewa Deelman","raw_affiliation_strings":["Information Sciences Institute, University of Southern California, Marina Del Rey, CA"],"affiliations":[{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, Marina Del Rey, CA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100390490"],"corresponding_institution_ids":["https://openalex.org/I114027177","https://openalex.org/I69048370"],"apc_list":null,"apc_paid":null,"fwci":1.2469,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.89145268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.8760473728179932},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8179662823677063},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6089215874671936},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6041297912597656},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5623588562011719},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.5212598443031311},{"id":"https://openalex.org/keywords/workflow-management-system","display_name":"Workflow management system","score":0.5178421139717102},{"id":"https://openalex.org/keywords/workflow-technology","display_name":"Workflow technology","score":0.41644757986068726},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4116175174713135},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4007711112499237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3748166561126709},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15800997614860535}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.8760473728179932},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8179662823677063},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6089215874671936},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6041297912597656},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5623588562011719},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.5212598443031311},{"id":"https://openalex.org/C140824633","wikidata":"https://www.wikidata.org/wiki/Q2808660","display_name":"Workflow management system","level":3,"score":0.5178421139717102},{"id":"https://openalex.org/C19612761","wikidata":"https://www.wikidata.org/wiki/Q8034836","display_name":"Workflow technology","level":3,"score":0.41644757986068726},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4116175174713135},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4007711112499237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3748166561126709},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15800997614860535},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec43674.2020.9286139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec43674.2020.9286139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G759740191","display_name":null,"funder_award_id":"DE-SC0012636M","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W189242244","https://openalex.org/W790410098","https://openalex.org/W1613448136","https://openalex.org/W1985471550","https://openalex.org/W1985847831","https://openalex.org/W2002287579","https://openalex.org/W2019014808","https://openalex.org/W2031846435","https://openalex.org/W2078962046","https://openalex.org/W2125283600","https://openalex.org/W2134089414","https://openalex.org/W2144034089","https://openalex.org/W2153919695","https://openalex.org/W2296719434","https://openalex.org/W2519835867","https://openalex.org/W2766766128","https://openalex.org/W2947593158","https://openalex.org/W2952014779","https://openalex.org/W4248220371","https://openalex.org/W6679849079"],"related_works":["https://openalex.org/W2358008699","https://openalex.org/W2746179582","https://openalex.org/W1987942303","https://openalex.org/W2050637807","https://openalex.org/W3097869156","https://openalex.org/W2359571229","https://openalex.org/W2143887500","https://openalex.org/W2184994130","https://openalex.org/W2404607953","https://openalex.org/W2031725441"],"abstract_inverted_index":{"Today's":[0],"computational":[1,39],"science":[2,29,70],"applications":[3],"are":[4,31],"increasingly":[5],"dependent":[6],"on":[7,12,99,110,201],"many":[8],"complex,":[9],"data-intensive":[10],"operations":[11],"distributed":[13,112],"datasets":[14,104],"that":[15,117,190],"originate":[16],"from":[17,106],"a":[18,77,111,139,196],"variety":[19],"of":[20,37,52,79,142,159],"scientific":[21,47,208],"instruments":[22],"and":[23,40,68,87,102,130,151,204],"repositories.":[24],"To":[25],"manage":[26],"this":[27,73],"complexity,":[28],"workflows":[30,53,133,209],"created":[32],"to":[33,65,90],"automate":[34],"the":[35,50,118,157,160,170],"execution":[36],"these":[38,191],"data":[41],"transfer":[42],"tasks,":[43],"which":[44],"significantly":[45],"improves":[46],"productivity.":[48],"As":[49],"scale":[51],"rapidly":[54],"increases,":[55],"detecting":[56],"anomalous":[57,92],"behaviors":[58],"in":[59,185,211],"workflow":[60,93,108,164],"executions":[61,109],"has":[62],"become":[63],"critical":[64],"ensure":[66],"timely":[67],"accurate":[69],"products.":[71],"In":[72],"paper,":[74],"we":[75],"present":[76],"set":[78],"lightweight":[80],"machine":[81],"learning-based":[82],"techniques,":[83],"including":[84],"both":[85,100],"supervised":[86],"unsupervised":[88],"algorithms,":[89],"identify":[91],"behaviors.":[94],"We":[95,188],"perform":[96],"anomaly":[97,165,202],"analysis":[98,120],"workflow-level":[101,119,161],"task-level":[103,168],"collected":[105],"real":[107],"cloud":[113],"testbed.":[114],"Results":[115],"show":[116],"employing":[121],"k-means":[122],"clustering":[123],"can":[124,181,194],"accurately":[125],"cluster":[126],"anomalous,":[127],"i.e.":[128],"failure-prone":[129],"poorly":[131],"performing":[132],"into":[134],"statistically":[135],"similar":[136],"classes":[137],"with":[138],"reasonable":[140],"quality":[141],"clustering,":[143],"achieving":[144],"over":[145],"0.7":[146],"for":[147,163,198,207],"Normalized":[148],"Mutual":[149],"Information":[150],"Completeness":[152],"scores.":[153],"These":[154],"results":[155,193],"affirm":[156],"selection":[158],"features":[162],"analysis.":[166],"For":[167],"analysis,":[169],"Decision":[171],"Tree":[172],"classifier":[173],"achieves":[174],">80%":[175],"accuracy,":[176],"while":[177],"other":[178],"tested":[179],"classifiers":[180],"achieve":[182],">50%":[183],"accuracy":[184],"most":[186],"cases.":[187],"believe":[189],"promising":[192],"be":[195],"foundation":[197],"future":[199],"research":[200],"detection":[203],"failure":[205],"prediction":[206],"running":[210],"production":[212],"environments.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
