{"id":"https://openalex.org/W2007077266","doi":"https://doi.org/10.1109/tit.2014.2361055","title":"Consistent Procedures for Cluster Tree Estimation and Pruning","display_name":"Consistent Procedures for Cluster Tree Estimation and Pruning","publication_year":2014,"publication_date":"2014-10-03","ids":{"openalex":"https://openalex.org/W2007077266","doi":"https://doi.org/10.1109/tit.2014.2361055","mag":"2007077266"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2014.2361055","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2014.2361055","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010790447","display_name":"Kamalika Chaudhuri","orcid":"https://orcid.org/0000-0001-9646-7710"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kamalika Chaudhuri","raw_affiliation_strings":["University of California at San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California at San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101707744","display_name":"Sanjoy Dasgupta","orcid":"https://orcid.org/0000-0002-5960-5157"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanjoy Dasgupta","raw_affiliation_strings":["University of California at San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California at San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076130287","display_name":"Samory Kpotufe","orcid":null},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]},{"id":"https://openalex.org/I160992636","display_name":"Toyota Technological Institute at Chicago","ror":"https://ror.org/02sn5gb64","country_code":"US","type":"education","lineage":["https://openalex.org/I160992636"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samory Kpotufe","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA","Toyota Technological Institute at Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]},{"raw_affiliation_string":"Toyota Technological Institute at Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I160992636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083754306","display_name":"Ulrike von Luxburg","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ulrike von Luxburg","raw_affiliation_strings":["University of Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010790447"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":5.9199,"has_fulltext":false,"cited_by_count":55,"citation_normalized_percentile":{"value":0.96115203,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"60","issue":"12","first_page":"7900","last_page":"7912"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.6148209571838379},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.6069533228874207},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.5853632092475891},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5747509598731995},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.5222252607345581},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.5087397694587708},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.49028733372688293},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.4762166440486908},{"id":"https://openalex.org/keywords/single-linkage-clustering","display_name":"Single-linkage clustering","score":0.45540130138397217},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4312435984611511},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.42911723256111145},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.42195558547973633},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.4197956919670105},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3419725298881531},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.26339343190193176},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2429114282131195},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20303791761398315},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.19797194004058838},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.14199015498161316}],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6148209571838379},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.6069533228874207},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.5853632092475891},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5747509598731995},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.5222252607345581},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.5087397694587708},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.49028733372688293},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.4762166440486908},{"id":"https://openalex.org/C22648726","wikidata":"https://www.wikidata.org/wiki/Q7523744","display_name":"Single-linkage clustering","level":5,"score":0.45540130138397217},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4312435984611511},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.42911723256111145},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.42195558547973633},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.4197956919670105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3419725298881531},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.26339343190193176},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2429114282131195},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20303791761398315},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.19797194004058838},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.14199015498161316},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tit.2014.2361055","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2014.2361055","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3103558160","display_name":null,"funder_award_id":"IIS-1162581","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W140681181","https://openalex.org/W1538452572","https://openalex.org/W1584683589","https://openalex.org/W1656501588","https://openalex.org/W1940950356","https://openalex.org/W1976901474","https://openalex.org/W1977989101","https://openalex.org/W2039344022","https://openalex.org/W2048675583","https://openalex.org/W2053138484","https://openalex.org/W2060918050","https://openalex.org/W2086943813","https://openalex.org/W2096159164","https://openalex.org/W2099111195","https://openalex.org/W2111296615","https://openalex.org/W2117250207","https://openalex.org/W2122218279","https://openalex.org/W2126779282","https://openalex.org/W2141051596","https://openalex.org/W2155170697","https://openalex.org/W2169224215","https://openalex.org/W2181555442","https://openalex.org/W2478708596","https://openalex.org/W2794559960","https://openalex.org/W3106413396","https://openalex.org/W3106467272","https://openalex.org/W6635217366","https://openalex.org/W6636697726","https://openalex.org/W6680594832","https://openalex.org/W6684702370","https://openalex.org/W6685412758"],"related_works":["https://openalex.org/W2488882714","https://openalex.org/W2357582197","https://openalex.org/W2534504385","https://openalex.org/W2318830132","https://openalex.org/W4312222690","https://openalex.org/W1985101567","https://openalex.org/W126354040","https://openalex.org/W2307746788","https://openalex.org/W2035408495","https://openalex.org/W2592952084"],"abstract_inverted_index":{"For":[0],"a":[1,10,36,62,113],"density":[2],"f":[3,20,43,57],"on":[4,77,102],"R":[5],"<sup":[6],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[7],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">d</sup>":[8],",":[9],"high-density":[11,33],"cluster":[12,40,52,107],"is":[13,61,75],"any":[14],"connected":[15],"component":[16],"of":[17,31,42,65,82,106],"{x":[18],":":[19],"(x)":[21],"\u2265":[22],"\u03bb},":[23],"for":[24,49,70,90],"some":[25],"\u03bb":[26],">":[27],"0.":[28],"The":[29,59,73],"set":[30],"all":[32],"clusters":[34,126],"forms":[35],"hierarchy":[37],"called":[38],"the":[39,51,66,78,83,103],"tree":[41,53,108,114],".":[44,58],"We":[45,85],"present":[46],"two":[47],"procedures":[48],"estimating":[50],"given":[54],"samples":[55],"from":[56],"first":[60],"robust":[63],"variant":[64],"single":[67],"linkage":[68],"algorithm":[69],"hierarchical":[71],"clustering.":[72],"second":[74],"based":[76],"k-nearest":[79],"neighbor":[80],"graph":[81],"samples.":[84],"give":[86],"finite-sample":[87],"convergence":[88],"rates":[89],"these":[91],"algorithms,":[92],"which":[93],"also":[94],"imply":[95],"consistency,":[96],"and":[97],"we":[98,111],"derive":[99],"lower":[100],"bounds":[101],"sample":[104],"complexity":[105],"estimation.":[109],"Finally,":[110],"study":[112],"pruning":[115],"procedure":[116],"that":[117,127,133],"guarantees,":[118],"under":[119],"milder":[120],"conditions":[121],"than":[122],"usual,":[123],"to":[124],"remove":[125],"are":[128,134],"spurious":[129],"while":[130],"recovering":[131],"those":[132],"salient.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
