{"id":"https://openalex.org/W2783019858","doi":"https://doi.org/10.1109/bigdata.2017.8257998","title":"Distributed Top-N local outlier detection in big data","display_name":"Distributed Top-N local outlier detection in big data","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2783019858","doi":"https://doi.org/10.1109/bigdata.2017.8257998","mag":"2783019858"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8257998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257998","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108577922","display_name":"Yizhou Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yizhou Yan","raw_affiliation_strings":["Computer Science, Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049926126","display_name":"Lei Cao","orcid":"https://orcid.org/0000-0001-9909-8607"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lei Cao","raw_affiliation_strings":["CSAIL, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"CSAIL, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008269094","display_name":"Elke A. Rundensteiner","orcid":"https://orcid.org/0000-0001-5375-9254"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elke A. Rundensteiner","raw_affiliation_strings":["Computer Science, Worcester Polytechnic Institute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Worcester Polytechnic Institute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5108577922"],"corresponding_institution_ids":["https://openalex.org/I107077323"],"apc_list":null,"apc_paid":null,"fwci":1.5602,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87786646,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"827","last_page":"836"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.8262611627578735},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.785291314125061},{"id":"https://openalex.org/keywords/terabyte","display_name":"Terabyte","score":0.7578060626983643},{"id":"https://openalex.org/keywords/local-outlier-factor","display_name":"Local outlier factor","score":0.7511903643608093},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.7055503129959106},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6217595934867859},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5053958892822266},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.46431586146354675},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.45666390657424927},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.4512726664543152},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3280937075614929},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18499431014060974},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.10733485221862793}],"concepts":[{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.8262611627578735},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.785291314125061},{"id":"https://openalex.org/C199683683","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Terabyte","level":2,"score":0.7578060626983643},{"id":"https://openalex.org/C169029474","wikidata":"https://www.wikidata.org/wiki/Q387942","display_name":"Local outlier factor","level":3,"score":0.7511903643608093},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.7055503129959106},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6217595934867859},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5053958892822266},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.46431586146354675},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.45666390657424927},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.4512726664543152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3280937075614929},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18499431014060974},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.10733485221862793},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2017.8257998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257998","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W42722137","https://openalex.org/W1552339598","https://openalex.org/W1568832590","https://openalex.org/W1989750313","https://openalex.org/W1989870481","https://openalex.org/W1996087613","https://openalex.org/W2012122611","https://openalex.org/W2049003051","https://openalex.org/W2049058890","https://openalex.org/W2050439513","https://openalex.org/W2066219179","https://openalex.org/W2129281431","https://openalex.org/W2131975293","https://openalex.org/W2144182447","https://openalex.org/W2154879298","https://openalex.org/W2211491388","https://openalex.org/W2282861635","https://openalex.org/W2743479942","https://openalex.org/W2744680121","https://openalex.org/W3099369182","https://openalex.org/W4254182148","https://openalex.org/W4256141317","https://openalex.org/W6679815717","https://openalex.org/W6742429338"],"related_works":["https://openalex.org/W2936171637","https://openalex.org/W1586214342","https://openalex.org/W2499612753","https://openalex.org/W2260589296","https://openalex.org/W2770832849","https://openalex.org/W3157828377","https://openalex.org/W3111802945","https://openalex.org/W181157820","https://openalex.org/W205872183","https://openalex.org/W2946096271"],"abstract_inverted_index":{"The":[0],"concept":[1],"of":[2,11,47,58,79,118,134,166,170,191],"Top-N":[3,38,80,109,136],"local":[4,39,81],"outlier":[5,82,137],"that":[6,93,100,146,162],"focuses":[7],"on":[8,121,142,182],"the":[9,12,16,45,71,129,135,143,164,171,189,200],"detection":[10,83,138],"N":[13],"points":[14,53,119],"with":[15],"largest":[17],"Local":[18],"Outlier":[19],"Factor":[20],"(LOF)":[21],"score":[22],"has":[23],"been":[24],"shown":[25],"to":[26,75,103,195,205],"be":[27,106,114],"very":[28],"effective":[29],"for":[30,50],"identifying":[31],"outliers":[32,40,110],"in":[33,153,175],"big":[34],"datasets.":[35,177,208],"However,":[36],"detecting":[37],"is":[41],"computationally":[42],"expensive,":[43],"since":[44],"computation":[46],"LOF":[48],"scores":[49],"all":[51],"data":[52],"requires":[54],"a":[55],"huge":[56],"number":[57],"high":[59],"complexity":[60],"k-nearest":[61],"neighbor":[62],"(kNN)":[63],"searches.":[64],"In":[65],"this":[66,77],"work,":[67],"we":[68,157],"thus":[69],"present":[70],"first":[72],"distributed":[73],"solution":[74],"tackle":[76],"problem":[78],"(DTOLF).":[84],"First,":[85],"DTOLF":[86,192],"features":[87],"an":[88],"innovative":[89],"safe":[90,172],"elimination":[91,173],"strategy":[92,174],"efficiently":[94],"identifies":[95],"dually-safe":[96],"points,":[97],"namely":[98],"those":[99],"are":[101,151],"guaranteed":[102],"(1)":[104],"not":[105,113],"classified":[107],"as":[108,116],"and":[111,131,169,185,203],"(2)":[112],"needed":[115],"neighbors":[117],"residing":[120],"other":[122],"machines.":[123],"Therefore,":[124],"it":[125],"effectively":[126],"minimizes":[127],"both":[128],"processing":[130],"communication":[132],"costs":[133],"process.":[139],"Further,":[140],"based":[141],"well-accepted":[144],"observation":[145],"strong":[147],"correlations":[148],"among":[149],"attributes":[150],"prevalent":[152],"real":[154],"world":[155],"datasets,":[156],"propose":[158],"correlation-aware":[159],"optimization":[160],"strategies":[161],"ensure":[163],"effectiveness":[165,190],"grid-based":[167],"partitioning":[168],"multi-dimensional":[176],"Our":[178],"extensive":[179],"experimental":[180],"evaluation":[181],"OpenStreetMap,":[183],"SDSS,":[184],"TIGER":[186],"datasets":[187],"demonstrates":[188],"-":[193],"up":[194],"10":[196],"times":[197],"faster":[198],"than":[199],"alternative":[201],"methods":[202],"scaling":[204],"terabyte":[206],"level":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
