{"id":"https://openalex.org/W4402969640","doi":"https://doi.org/10.1145/3677139","title":"Pluto: Sample Selection for Robust Anomaly Detection on Polluted Log Data","display_name":"Pluto: Sample Selection for Robust Anomaly Detection on Polluted Log Data","publication_year":2024,"publication_date":"2024-09-30","ids":{"openalex":"https://openalex.org/W4402969640","doi":"https://doi.org/10.1145/3677139"},"language":"en","primary_location":{"id":"doi:10.1145/3677139","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3677139","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101443114","display_name":"Lei Ma","orcid":"https://orcid.org/0000-0002-9252-2492"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lei Ma","raw_affiliation_strings":["Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049926126","display_name":"Lei Cao","orcid":"https://orcid.org/0000-0001-9909-8607"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lei Cao","raw_affiliation_strings":["University of Arizona, Tucson, AZ, USA"],"affiliations":[{"raw_affiliation_string":"University of Arizona, Tucson, AZ, USA","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062753670","display_name":"Peter M. VanNostrand","orcid":"https://orcid.org/0000-0002-0285-6019"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter M. VanNostrand","raw_affiliation_strings":["Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089949276","display_name":"Dennis M. Hofmann","orcid":"https://orcid.org/0000-0002-8102-3081"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dennis M. Hofmann","raw_affiliation_strings":["Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066307190","display_name":"Yao Su","orcid":"https://orcid.org/0000-0001-9817-660X"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yao Su","raw_affiliation_strings":["Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008269094","display_name":"Elke A. Rundensteiner","orcid":"https://orcid.org/0000-0001-5375-9254"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elke A. Rundensteiner","raw_affiliation_strings":["Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101443114"],"corresponding_institution_ids":["https://openalex.org/I107077323"],"apc_list":null,"apc_paid":null,"fwci":0.3434,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61276268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"2","issue":"4","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9776999950408936,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6598513722419739},{"id":"https://openalex.org/keywords/anomaly","display_name":"Anomaly (physics)","score":0.5913842916488647},{"id":"https://openalex.org/keywords/pluto","display_name":"Pluto","score":0.5546280741691589},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5231022834777832},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.49129432439804077},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.4585722088813782},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4044525623321533},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32992154359817505},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.322112500667572},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2509042024612427},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1915956735610962},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.1397199034690857},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.11161598563194275},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09982338547706604},{"id":"https://openalex.org/keywords/astrobiology","display_name":"Astrobiology","score":0.06323748826980591}],"concepts":[{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6598513722419739},{"id":"https://openalex.org/C12997251","wikidata":"https://www.wikidata.org/wiki/Q567560","display_name":"Anomaly (physics)","level":2,"score":0.5913842916488647},{"id":"https://openalex.org/C2780263841","wikidata":"https://www.wikidata.org/wiki/Q339","display_name":"Pluto","level":2,"score":0.5546280741691589},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5231022834777832},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.49129432439804077},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.4585722088813782},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4044525623321533},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32992154359817505},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.322112500667572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2509042024612427},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1915956735610962},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.1397199034690857},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.11161598563194275},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09982338547706604},{"id":"https://openalex.org/C87355193","wikidata":"https://www.wikidata.org/wiki/Q411","display_name":"Astrobiology","level":1,"score":0.06323748826980591},{"id":"https://openalex.org/C26873012","wikidata":"https://www.wikidata.org/wiki/Q214781","display_name":"Condensed matter physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3677139","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3677139","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4193947557","display_name":null,"funder_award_id":"IIS-1815866,IIS-1910880,CSSI-2103832,CNS-1852498,NRT-HDR-2021871,DBI-2327954","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G7297899346","display_name":null,"funder_award_id":"P200A180088","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1898824936","https://openalex.org/W2001619934","https://openalex.org/W2038819732","https://openalex.org/W2039157918","https://openalex.org/W2051601650","https://openalex.org/W2094924503","https://openalex.org/W2097749765","https://openalex.org/W2122646361","https://openalex.org/W2162895675","https://openalex.org/W2187089797","https://openalex.org/W2321990803","https://openalex.org/W2401686019","https://openalex.org/W2560021099","https://openalex.org/W2583874385","https://openalex.org/W2754665629","https://openalex.org/W2767094836","https://openalex.org/W3042609801","https://openalex.org/W3089028909","https://openalex.org/W3105982656","https://openalex.org/W3129166376","https://openalex.org/W3169066865","https://openalex.org/W3199174176","https://openalex.org/W3202345803","https://openalex.org/W3204785235","https://openalex.org/W3211977733","https://openalex.org/W4226128225","https://openalex.org/W4230472372","https://openalex.org/W4254182148","https://openalex.org/W4281741212","https://openalex.org/W4290927906","https://openalex.org/W4312433903","https://openalex.org/W4376288669","https://openalex.org/W4380433171","https://openalex.org/W6801615203","https://openalex.org/W6839719062"],"related_works":["https://openalex.org/W2806741695","https://openalex.org/W4290647774","https://openalex.org/W3189286258","https://openalex.org/W3207797160","https://openalex.org/W3210364259","https://openalex.org/W4300558037","https://openalex.org/W2912112202","https://openalex.org/W2667207928","https://openalex.org/W4377864969","https://openalex.org/W2972971679"],"abstract_inverted_index":{"Log":[0],"anomaly":[1,23,56,78,226],"detection,":[2],"critical":[3],"in":[4,93,179],"identifying":[5],"system":[6],"failures":[7],"and":[8,43,118,197],"preempting":[9],"security":[10],"breaches,":[11],"finds":[12],"irregular":[13],"patterns":[14],"within":[15],"large":[16],"volumes":[17],"of":[18,68,90,97,246,259,284],"log":[19,22,34,39,55,71,98,234],"data.":[20,35],"Modern":[21],"detectors":[24],"rely":[25],"on":[26,31,132,193,231],"training":[27,203],"deep":[28],"learning":[29],"models":[30],"clean":[32,38,64],"anomaly-free":[33],"However,":[36],"such":[37],"data":[40,73,211],"requires":[41],"expensive":[42],"tedious":[44],"human":[45],"labeling.":[46],"In":[47],"this":[48,285],"paper,":[49],"we":[50,148],"thus":[51],"propose":[52],"a":[53,63,76,175,190,198,264],"robust":[54],"detection":[57,79,227],"framework,":[58],"PlutoNOSPACE,":[59],"that":[60,112,121,151,238],"automatically":[61],"selects":[62],"representative":[65],"sample":[66,218,279],"subset":[67,188,199],"the":[69,94,102,108,142,154,162,186,194,207,210,224,247,260,277],"polluted":[70,124,146],"sequence":[72,103],"to":[74,87,116,169,222,243,255,270,276],"train":[75],"Transformer-based":[77],"model.":[80,228],"Pluto":[81,100,262],"features":[82],"three":[83],"innovations.":[84],"First,":[85],"due":[86],"localized":[88],"concentrations":[89],"anomalies":[91],"inherent":[92],"embedding":[95,104],"space":[96,105],"data,":[99],"partitions":[101],"generated":[106,205],"by":[107,125,182,206,239],"model":[109,191],"into":[110,161],"regions":[111,120],"then":[113],"allow":[114],"it":[115],"identify":[117],"discard":[119],"are":[122],"highly":[123],"our":[126,133],"pollution":[127,134],"level":[128],"estimation":[129],"scheme,":[130],"based":[131],"quantification":[135],"via":[136],"Gaussian":[137],"mixture":[138],"modeling.":[139],"Second,":[140],"for":[141],"remaining":[143],"more":[144],"slightly":[145],"regions,":[147],"select":[149],"samples":[150],"maximally":[152],"purify":[153],"eigenvector":[155],"spectrum,":[156],"which":[157],"can":[158],"be":[159],"transformed":[160],"NP-hard":[163],"facility":[164],"location":[165],"problem;":[166],"allowing":[167],"us":[168],"leverage":[170],"its":[171],"greedy":[172],"solution":[173],"with":[174],"(1-(1/e))":[176],"approximation":[177],"guarantee":[178],"optimality.":[180],"Third,":[181],"iteratively":[183],"alternating":[184],"between":[185],"above":[187],"selection,":[189],"re-training":[192],"latest":[195,208],"subset,":[196],"filtering":[200],"using":[201],"dynamic":[202],"artifacts":[204],"model,":[209],"selected":[212],"is":[213,220,287],"progressively":[214],"refined.":[215],"The":[216,282],"final":[217,225],"set":[219],"used":[221],"retrain":[223],"Our":[229],"experiments":[230],"four":[232],"real-world":[233],"benchmark":[235],"datasets":[236],"demonstrate":[237],"retaining":[240],"77.7%":[241],"(BGL)":[242,254],"96.6%":[244],"(ThunderBird)":[245],"normal":[248],"sequences":[249],"while":[250],"effectively":[251],"removing":[252],"90.3%":[253],"100.0%":[256],"(ThunderBird,":[257],"HDFS)":[258],"anomalies,":[261],"provides":[263],"significant":[265],"absolute":[266],"F-1":[267],"improvement":[268],"up":[269],"68.86%":[271],"(2.16%":[272],"\u2192":[273],"71.02%)":[274],"compared":[275],"state-of-the-art":[278],"selection":[280],"methods.":[281],"implementation":[283],"work":[286],"available":[288],"at":[289],"https://github.com/LeiMa0324/Pluto-SIGMOD25.":[290]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
