{"id":"https://openalex.org/W2898833900","doi":"https://doi.org/10.1609/aaai.v33i01.33015000","title":"On the Persistence of Clustering Solutions and True Number of Clusters in a Dataset","display_name":"On the Persistence of Clustering Solutions and True Number of Clusters in a Dataset","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2898833900","doi":"https://doi.org/10.1609/aaai.v33i01.33015000","mag":"2898833900"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33015000","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33015000","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4431/4309","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4431/4309","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103142073","display_name":"Amber Srivastava","orcid":"https://orcid.org/0000-0002-0568-8276"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amber Srivastava","raw_affiliation_strings":["University of Illinois at Urbana Champaign","***University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana Champaign","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"***University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085311610","display_name":"Mayank Baranwal","orcid":"https://orcid.org/0000-0001-9354-2826"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mayank Baranwal","raw_affiliation_strings":["University of Michigan, Ann Arbor","University of Michigan Ann Arbor"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"University of Michigan Ann Arbor","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051680600","display_name":"Srinivasa M. Salapaka","orcid":"https://orcid.org/0000-0002-3259-6027"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srinivasa Salapaka","raw_affiliation_strings":["University of Illinois at Urbana Champaign","***University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana Champaign","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"***University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103142073"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.2665,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58312236,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"33","issue":"01","first_page":"5000","last_page":"5007"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8593998551368713},{"id":"https://openalex.org/keywords/determining-the-number-of-clusters-in-a-data-set","display_name":"Determining the number of clusters in a data set","score":0.6279984712600708},{"id":"https://openalex.org/keywords/single-linkage-clustering","display_name":"Single-linkage clustering","score":0.5780671834945679},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.5384289026260376},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.5370693802833557},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.5212476849555969},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.4911818206310272},{"id":"https://openalex.org/keywords/k-medians-clustering","display_name":"k-medians clustering","score":0.4824393689632416},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.47337406873703003},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47086989879608154},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4222784638404846},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.39763274788856506},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3488767147064209},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1845947802066803}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8593998551368713},{"id":"https://openalex.org/C149872217","wikidata":"https://www.wikidata.org/wiki/Q5265701","display_name":"Determining the number of clusters in a data set","level":5,"score":0.6279984712600708},{"id":"https://openalex.org/C22648726","wikidata":"https://www.wikidata.org/wiki/Q7523744","display_name":"Single-linkage clustering","level":5,"score":0.5780671834945679},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.5384289026260376},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.5370693802833557},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.5212476849555969},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.4911818206310272},{"id":"https://openalex.org/C115328559","wikidata":"https://www.wikidata.org/wiki/Q4041956","display_name":"k-medians clustering","level":5,"score":0.4824393689632416},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.47337406873703003},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47086989879608154},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4222784638404846},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39763274788856506},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3488767147064209},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1845947802066803},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1609/aaai.v33i01.33015000","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33015000","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4431/4309","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1811.00102","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1811.00102","pdf_url":"https://arxiv.org/pdf/1811.00102","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2898833900","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1811.00102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1811.00102","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1811.00102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33015000","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33015000","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4431/4309","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2898833900.pdf","grobid_xml":"https://content.openalex.org/works/W2898833900.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W101836494","https://openalex.org/W312962489","https://openalex.org/W1515277467","https://openalex.org/W1673075472","https://openalex.org/W1965468691","https://openalex.org/W1973041621","https://openalex.org/W1977556410","https://openalex.org/W1985593448","https://openalex.org/W1986007546","https://openalex.org/W1991645955","https://openalex.org/W1997817740","https://openalex.org/W2018821242","https://openalex.org/W2049633694","https://openalex.org/W2054658115","https://openalex.org/W2069353545","https://openalex.org/W2071949631","https://openalex.org/W2077085434","https://openalex.org/W2123381582","https://openalex.org/W2130473611","https://openalex.org/W2130539320","https://openalex.org/W2137572914","https://openalex.org/W2145721327","https://openalex.org/W2146261447","https://openalex.org/W2149573555","https://openalex.org/W2150753219","https://openalex.org/W2161877964","https://openalex.org/W2165874743","https://openalex.org/W2884586244","https://openalex.org/W3120740533","https://openalex.org/W6608065289","https://openalex.org/W6644682428","https://openalex.org/W6649534527","https://openalex.org/W6655030434","https://openalex.org/W6664471178","https://openalex.org/W6671018158","https://openalex.org/W6679597375","https://openalex.org/W6680590307","https://openalex.org/W6682346614","https://openalex.org/W6684578312","https://openalex.org/W7048738093","https://openalex.org/W7065866364"],"related_works":["https://openalex.org/W2900594807","https://openalex.org/W2965627224","https://openalex.org/W3095433343","https://openalex.org/W2778728785","https://openalex.org/W2585572444","https://openalex.org/W3170057593","https://openalex.org/W2406116879","https://openalex.org/W2560094743","https://openalex.org/W2520151450","https://openalex.org/W2335700973","https://openalex.org/W2287897145","https://openalex.org/W2154837929","https://openalex.org/W2295884465","https://openalex.org/W2298363337","https://openalex.org/W2119276018","https://openalex.org/W3009220910","https://openalex.org/W1975417018","https://openalex.org/W2295256067","https://openalex.org/W2581190813","https://openalex.org/W3181646358"],"abstract_inverted_index":{"Typically":[0],"clustering":[1,4,37,52,75,107,127,190],"algorithms":[2,182],"provide":[3],"solutions":[5,38,53,57,108,128,148,191],"with":[6,39,58,109,149,192,223],"prespecified":[7],"number":[8,20,41,60,111,151,194,216],"of":[9,13,21,42,49,51,61,69,83,88,106,112,152,159,195,205,217],"clusters.":[10,43,62,113,153,196],"The":[11,63],"lack":[12],"a":[14,32,47,74,97,104,123,157],"priori":[15,124],"knowledge":[16],"on":[17,156],"the":[18,25,36,67,84,90,117,126,131,166,171,189,206,210,215],"true":[19,150,193],"underlying":[22],"clusters":[23,121,133],"in":[24,81,103,138,186,203,209],"dataset":[26],"makes":[27],"it":[28,78],"important":[29],"to":[30,34,66,146,225],"have":[31],"metric":[33],"compare":[35],"different":[40,59,110],"This":[44],"article":[45],"quantifies":[46],"notion":[48,142],"persistence":[50,64,98],"that":[54,116,129,165],"enables":[55],"comparing":[56],"relates":[65],"range":[68],"dataresolution":[70],"scales":[71],"over":[72,86],"which":[73],"solution":[76],"persists;":[77],"is":[79],"quantified":[80],"terms":[82,204],"maximum":[85],"two-norms":[87],"all":[89],"associated":[91],"cluster-covariance":[92],"matrices.":[93],"Thus":[94],"we":[95],"associate":[96],"value":[99],"for":[100],"each":[101],"element":[102],"set":[105],"We":[114],"show":[115],"datasets":[118,163],"where":[119,214],"natural":[120,132],"are":[122,134],"known,":[125],"identify":[130,147],"most":[135],"persistent":[136],"-":[137],"this":[139,141],"way,":[140],"can":[143,200],"be":[144,201],"used":[145],"Detailed":[154],"experiments":[155],"variety":[158],"standard":[160],"and":[161,183],"synthetic":[162],"demonstrate":[164],"proposed":[167],"persistence-based":[168],"indicator":[169],"outperforms":[170],"existing":[172],"approaches,":[173],"such":[174],"as,":[175],"gap-statistic":[176],"method,":[177,185],"X-means,":[178],"Gmeans,":[179],"PG-means,":[180],"dip-means":[181],"informationtheoretic":[184],"accurately":[187],"identifying":[188],"Interestingly,":[197],"our":[198],"method":[199],"explained":[202],"phase-transition":[207],"phenomenon":[208],"deterministic":[211],"annealing":[212,227],"algorithm,":[213],"distinct":[218],"cluster":[219],"centers":[220],"changes":[221],"(bifurcates)":[222],"respect":[224],"an":[226],"parameter.":[228]},"counts_by_year":[{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
