{"id":"https://openalex.org/W2020362899","doi":"https://doi.org/10.1145/1150402.1150503","title":"K-means clustering versus validation measures","display_name":"K-means clustering versus validation measures","publication_year":2006,"publication_date":"2006-08-20","ids":{"openalex":"https://openalex.org/W2020362899","doi":"https://doi.org/10.1145/1150402.1150503","mag":"2020362899"},"language":"en","primary_location":{"id":"doi:10.1145/1150402.1150503","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1150402.1150503","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101862104","display_name":"Hui Xiong","orcid":"https://orcid.org/0000-0001-6016-6465"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]},{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL","US"],"is_corresponding":true,"raw_author_name":"Hui Xiong","raw_affiliation_strings":["Rutgers University","Rutgers, University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rutgers University","institution_ids":["https://openalex.org/I4210096112"]},{"raw_affiliation_string":"Rutgers, University","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035293475","display_name":"Junjie Wu","orcid":"https://orcid.org/0000-0001-7650-3657"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Wu","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100756747","display_name":"Jian Chen","orcid":"https://orcid.org/0000-0002-0760-0338"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Chen","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101862104"],"corresponding_institution_ids":["https://openalex.org/I102322142","https://openalex.org/I4210096112"],"apc_list":null,"apc_paid":null,"fwci":5.5884,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.9545234,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"779","last_page":"784"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.9118924140930176},{"id":"https://openalex.org/keywords/k-medians-clustering","display_name":"k-medians clustering","score":0.6733030676841736},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.6555750370025635},{"id":"https://openalex.org/keywords/single-linkage-clustering","display_name":"Single-linkage clustering","score":0.6272838711738586},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.589209794998169},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.556413471698761},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.5508173704147339},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.5482292175292969},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5457427501678467},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.49950361251831055},{"id":"https://openalex.org/keywords/complete-linkage-clustering","display_name":"Complete-linkage clustering","score":0.48290514945983887},{"id":"https://openalex.org/keywords/determining-the-number-of-clusters-in-a-data-set","display_name":"Determining the number of clusters in a data set","score":0.46439605951309204},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.4249145984649658},{"id":"https://openalex.org/keywords/clustering-high-dimensional-data","display_name":"Clustering high-dimensional data","score":0.4243004024028778},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3944512903690338},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33194297552108765},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.307068407535553},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11464551091194153}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.9118924140930176},{"id":"https://openalex.org/C115328559","wikidata":"https://www.wikidata.org/wiki/Q4041956","display_name":"k-medians clustering","level":5,"score":0.6733030676841736},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.6555750370025635},{"id":"https://openalex.org/C22648726","wikidata":"https://www.wikidata.org/wiki/Q7523744","display_name":"Single-linkage clustering","level":5,"score":0.6272838711738586},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.589209794998169},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.556413471698761},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.5508173704147339},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.5482292175292969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5457427501678467},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.49950361251831055},{"id":"https://openalex.org/C23822008","wikidata":"https://www.wikidata.org/wiki/Q5156437","display_name":"Complete-linkage clustering","level":5,"score":0.48290514945983887},{"id":"https://openalex.org/C149872217","wikidata":"https://www.wikidata.org/wiki/Q5265701","display_name":"Determining the number of clusters in a data set","level":5,"score":0.46439605951309204},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.4249145984649658},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.4243004024028778},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3944512903690338},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33194297552108765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.307068407535553},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11464551091194153},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1150402.1150503","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1150402.1150503","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W9613553","https://openalex.org/W147860157","https://openalex.org/W1488289178","https://openalex.org/W1565377632","https://openalex.org/W1651093245","https://openalex.org/W1660264423","https://openalex.org/W1673310716","https://openalex.org/W1760551737","https://openalex.org/W1938740620","https://openalex.org/W1971784203","https://openalex.org/W2036868373","https://openalex.org/W2061240327","https://openalex.org/W2070412788","https://openalex.org/W2071664212","https://openalex.org/W2084812512","https://openalex.org/W2087962968","https://openalex.org/W2089923519","https://openalex.org/W2095897464","https://openalex.org/W2097253747","https://openalex.org/W2097413644","https://openalex.org/W2098162425","https://openalex.org/W2103868202","https://openalex.org/W2122943553","https://openalex.org/W2123386615","https://openalex.org/W2123497041","https://openalex.org/W2127218421","https://openalex.org/W2131687179","https://openalex.org/W2136663225","https://openalex.org/W2138218344","https://openalex.org/W2141585940","https://openalex.org/W2144182447","https://openalex.org/W2146483359","https://openalex.org/W2149148023","https://openalex.org/W2157665255","https://openalex.org/W2158312857","https://openalex.org/W2168667497","https://openalex.org/W2171975443","https://openalex.org/W2482589566","https://openalex.org/W2753755000","https://openalex.org/W3099514962","https://openalex.org/W3099839986","https://openalex.org/W3139328003","https://openalex.org/W4285719527","https://openalex.org/W6633894697","https://openalex.org/W6637231022","https://openalex.org/W6674809819","https://openalex.org/W6770641979","https://openalex.org/W7052201853"],"related_works":["https://openalex.org/W2229552629","https://openalex.org/W2188840951","https://openalex.org/W2263992973","https://openalex.org/W1481928625","https://openalex.org/W2338192130","https://openalex.org/W2308893070","https://openalex.org/W4232307982","https://openalex.org/W3092684173","https://openalex.org/W2141140100","https://openalex.org/W2213356533"],"abstract_inverted_index":{"K-means":[0,20,42,51,71,82,102,153],"is":[1,24,74,86,174],"a":[2,176],"widely":[3],"used":[4],"partitional":[5],"clustering":[6,52,91,127,135],"method.":[7],"While":[8],"there":[9],"are":[10],"considerable":[11],"research":[12],"efforts":[13],"to":[14,26,104,139,155,182],"characterize":[15],"the":[16,31,36,39,50,60,68,75,87,90,106,109,114,122,131,134,144,157,161,164,170],"key":[17],"features":[18],"of":[19,41,70,89,163,172],"clustering,":[21],"further":[22],"investigation":[23],"needed":[25],"reveal":[27],"whether":[28],"and":[29],"how":[30,59],"data":[32],"distributions":[33],"can":[34,64],"have":[35],"impact":[37,66],"on":[38,67,113,133,143],"performance":[40,69],"clustering.":[43],"Indeed,":[44],"in":[45,159,175],"this":[46],"paper,":[47],"we":[48,98,119],"revisit":[49],"problem":[53],"by":[54,93,169],"answering":[55],"three":[56],"questions.":[57],"First,":[58],"\"true\"":[61],"cluster":[62,115,145,165],"sizes":[63],"make":[65],"clustering?":[72,83],"Second,":[73],"entropy":[76,123],"an":[77,125],"algorithm-independent":[78],"validation":[79,128],"measure":[80],"for":[81],"Finally,":[84,147],"what":[85],"distribution":[88,112],"results":[92,150],"K-means?":[94],"To":[95],"that":[96,101,121,152],"end,":[97],"first":[99],"illustrate":[100],"tends":[103,154],"generate":[105],"clusters":[107,158],"with":[108],"relatively":[110],"uniform":[111],"sizes.":[116,146],"In":[117],"addition,":[118],"show":[120],"measure,":[124,129],"external":[126],"has":[130],"favorite":[132],"algorithms":[136],"which":[137,160],"tend":[138],"reduce":[140],"high":[141],"variation":[142,162],"our":[148],"experimental":[149],"indicate":[151],"produce":[156],"sizes,":[166],"as":[167],"measured":[168],"Coefficient":[171],"Variation(CV),":[173],"specific":[177],"range,":[178],"approximately":[179],"from":[180],"0.3":[181],"1.0.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-26T08:31:28.666265","created_date":"2025-10-10T00:00:00"}
