{"id":"https://openalex.org/W3174526319","doi":"https://doi.org/10.1145/3448016.3457271","title":"On Saving Outliers for Better Clustering over Noisy Data","display_name":"On Saving Outliers for Better Clustering over Noisy Data","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3174526319","doi":"https://doi.org/10.1145/3448016.3457271","mag":"3174526319"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3457271","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3457271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084430029","display_name":"Shaoxu Song","orcid":"https://orcid.org/0000-0002-9503-2755"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shaoxu Song","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101668880","display_name":"Fei Gao","orcid":"https://orcid.org/0000-0002-9779-4817"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Gao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101688217","display_name":"Ruihong Huang","orcid":"https://orcid.org/0000-0002-4572-4243"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruihong Huang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100428932","display_name":"Yihan Wang","orcid":"https://orcid.org/0000-0002-1681-010X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihan Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084430029"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.5439,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.72124964,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1692","last_page":"1704"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11220","display_name":"Water Systems and Optimization","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7709242105484009},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.7359779477119446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7117996215820312},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.49989748001098633},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39390766620635986}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7709242105484009},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.7359779477119446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7117996215820312},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49989748001098633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39390766620635986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3448016.3457271","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3457271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W162338572","https://openalex.org/W1521736627","https://openalex.org/W1532325895","https://openalex.org/W1552339598","https://openalex.org/W1581778302","https://openalex.org/W1592237688","https://openalex.org/W1604958321","https://openalex.org/W1673310716","https://openalex.org/W1966836840","https://openalex.org/W2001496424","https://openalex.org/W2002928429","https://openalex.org/W2024770506","https://openalex.org/W2041442195","https://openalex.org/W2044469685","https://openalex.org/W2047745978","https://openalex.org/W2059009730","https://openalex.org/W2061240327","https://openalex.org/W2074231493","https://openalex.org/W2089206172","https://openalex.org/W2094048240","https://openalex.org/W2109814272","https://openalex.org/W2124278683","https://openalex.org/W2137130182","https://openalex.org/W2153531471","https://openalex.org/W2160642098","https://openalex.org/W2162833336","https://openalex.org/W2164187405","https://openalex.org/W2164790781","https://openalex.org/W2170712852","https://openalex.org/W2395916081","https://openalex.org/W2591700809","https://openalex.org/W2918546371","https://openalex.org/W2935765604","https://openalex.org/W2963602704","https://openalex.org/W3090138545","https://openalex.org/W3147179393","https://openalex.org/W4213009331","https://openalex.org/W4247105055","https://openalex.org/W4247232020","https://openalex.org/W4300417575","https://openalex.org/W4309426526"],"related_works":["https://openalex.org/W3183283580","https://openalex.org/W2337929971","https://openalex.org/W4313069709","https://openalex.org/W4283741549","https://openalex.org/W1590210553","https://openalex.org/W4312609022","https://openalex.org/W2783242366","https://openalex.org/W2575052681","https://openalex.org/W4250175685","https://openalex.org/W2287462975"],"abstract_inverted_index":{"Clustering":[0],"is":[1,55,94,137],"often":[2],"distracted":[3],"by":[4,134],"errors,":[5],"frequently":[6],"observed":[7],"in":[8,19,78,105],"almost":[9],"all":[10],"areas,":[11],"ranging":[12],"from":[13],"online":[14],"questionnaire":[15],"to":[16,72,85,95,107,139,182,200],"sensor":[17],"reading":[18],"IoT.":[20],"The":[21,53,92,131],"dirty":[22],"data":[23,210],"values":[24,99,118],"not":[25],"only":[26],"make":[27,108],"themselves":[28],"(the":[29],"corresponding":[30],"tuples)":[31],"outlying,":[32,122],"but":[33],"also":[34],"mislead":[35],"the":[36,50,57,64,73,87,97,103,114,157,170,183,193,201,208],"clustering":[37,59,143],"of":[38,102,156,169,196],"remaining":[39],"tuples,":[40],"e.g.,":[41,77],"mistakenly":[42],"splitting":[43],"a":[44],"cluster":[45,51],"into":[46],"two":[47],"or":[48,69,147],"distorting":[49],"center.":[52],"reason":[54],"that":[56,207],"traditional":[58],"methods":[60,144],"either":[61],"simply":[62],"ignore":[63],"outliers":[65,88,191],"such":[66,223],"as":[67,218,220,224],"DBSCAN":[68,146],"assign":[70],"them":[71],"closest":[74],"clusters":[75],"anyway,":[76],"K-Means.":[79],"In":[80],"this":[81],"paper,":[82],"we":[83,205],"propose":[84],"save":[86],"for":[89,161],"better":[90],"clustering.":[91],"idea":[93],"adjust":[96],"erroneous":[98],"(often":[100],"minimally)":[101],"outlier":[104,132,158,212],"order":[106],"it":[109],"appear":[110],"normally.":[111],"That":[112],"is,":[113],"tuples":[115],"after":[116],"adjusting":[117],"are":[119],"no":[120],"longer":[121],"and":[123,166,173,226],"thus":[124],"will":[125],"be":[126],"clustered":[127],"without":[128],"distracting":[129],"others.":[130],"saving":[133,159,213],"value":[135],"adjustment":[136],"designed":[138],"work":[140],"with":[141,178,189,211],"any":[142],"(e.g.,":[145],"K-Means).":[148],"Our":[149],"technical":[150],"contributions":[151],"include:":[152],"(1)":[153],"showing":[154],"NPhardness":[155],"problem":[160],"clustering,":[162,217],"(2)":[163],"deriving":[164],"lower":[165],"upper":[167],"bounds":[168],"optimal":[171],"solutions,":[172],"(3)":[174],"devising":[175],"approximation":[176],"algorithm":[177],"performance":[179],"guarantees":[180],"referring":[181],"aforesaid":[184],"bounds.":[185],"Experiments":[186],"on":[187],"datasets":[188],"real-world":[190],"demonstrate":[192],"higher":[194],"accuracy":[195],"our":[197],"proposal,":[198],"compared":[199],"state-of-the-art":[202],"approaches.":[203],"Remarkably,":[204],"show":[206],"adjusted":[209],"indeed":[214],"improve":[215],"significantly":[216],"well":[219],"other":[221],"applications":[222],"classification":[225],"record":[227],"matching.":[228]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
