{"id":"https://openalex.org/W6930691535","doi":"https://doi.org/10.5281/zenodo.15530593","title":"FastLloyd Clustering Datasets","display_name":"FastLloyd Clustering Datasets","publication_year":2025,"publication_date":"2025-05-27","ids":{"openalex":"https://openalex.org/W6930691535","doi":"https://doi.org/10.5281/zenodo.15530593"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.15530593","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15530593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.15530593","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Diaa, Abdulrahman","orcid":"https://orcid.org/0009-0000-0377-3680"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Diaa, Abdulrahman","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Humphries, Thomas","orcid":"https://orcid.org/0000-0003-0844-6771"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Humphries, Thomas","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":null,"display_name":"Kerschbaum, Florian","orcid":"https://orcid.org/0000-0003-4288-2286"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Kerschbaum, Florian","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8307999968528748},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7795000076293945},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5232999920845032},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.44940000772476196},{"id":"https://openalex.org/keywords/clustering-high-dimensional-data","display_name":"Clustering high-dimensional data","score":0.40389999747276306},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4032000005245209},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.37950000166893005},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.373199999332428},{"id":"https://openalex.org/keywords/medoid","display_name":"Medoid","score":0.3476000130176544}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8307999968528748},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7795000076293945},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6729999780654907},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5888000130653381},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5232999920845032},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.44940000772476196},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42500001192092896},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.40389999747276306},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C63085389","wikidata":"https://www.wikidata.org/wiki/Q4287912","display_name":"Medoid","level":3,"score":0.3476000130176544},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C111442797","wikidata":"https://www.wikidata.org/wiki/Q7291446","display_name":"Rand index","level":3,"score":0.3231000006198883},{"id":"https://openalex.org/C22648726","wikidata":"https://www.wikidata.org/wiki/Q7523744","display_name":"Single-linkage clustering","level":5,"score":0.3206000030040741},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.3181999921798706},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.3125999867916107},{"id":"https://openalex.org/C207968372","wikidata":"https://www.wikidata.org/wiki/Q310401","display_name":"k-means clustering","level":3,"score":0.31139999628067017},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.28439998626708984},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.2720000147819519},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.25529998540878296},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2524999976158142},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.15530593","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15530593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.15530593","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15530593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"artifact":[1],"bundles":[2],"the":[3,16,29,42,50,58,65,81,97,107,171,292,303,311,324,349,366,397,403,473,494],"five":[4],"dataset":[5,157,173,215,229,284],"archives":[6],"used":[7],"in":[8,28,354],"our":[9],"private":[10],"federated":[11],"clustering":[12,36,175,202,501],"evaluation,":[13],"corresponding":[14],"to":[15,54,360,472,492],"real-world":[17,134],"benchmarks,":[18],"scaling":[19,296,474],"experiments,":[20,297],"ablation":[21,72,370],"studies,":[22,371],"and":[23,41,104,110,137,347],"timing":[24,452,480],"performance":[25],"tests":[26],"described":[27],"paper.":[30],"The":[31],"real_datasets.tar.xz":[32,131],"includes":[33,106],"ten":[34,133],"established":[35],"benchmarks":[37],"drawn":[38],"from":[39,96,187,402],"UCI":[40,155,211,227],"Clustering":[43,98],"basic":[44,99],"benchmark":[45,100,135,186],"(DOI:":[46,101],"https://doi.org/10.1007/s10489-018-1238-7);":[47],"scale_datasets.tar.xz":[48,290],"contains":[49,505],"SynthNew":[51],"family":[52],"generated":[53,322,391],"assess":[55],"scalability":[56],"via":[57,392],"R":[59,325],"clusterGeneration":[60,77,326],"package":[61,327],";":[62,78,103],"ablate_datasets.tar.xz":[63,364],"holds":[64],"AblateSynth":[66],"sets":[67,400],"varying":[68],"cluster":[69,329,350,387,456],"separation":[70,351,388],"for":[71,119,158,174,201,216,285,295,369,447,498],"analysis":[73,232],"also":[74],"powered":[75],"by":[76,514],"g2_datasets.tar.xz":[79,395],"packages":[80],"G2":[82,398],"sets\u2014Gaussian":[83],"clusters":[84,93,118],"of":[85,170,276,305,343,500],"size":[86],"2048":[87],"across":[88],"dimensions":[89],"2\u20131024":[90],"with":[91,144,230,323,328,454,488,511],"two":[92,444],"each,":[94],"collected":[95],"https://doi.org/10.1007/s10489-018-1238-7)":[102],"timing_datasets.tar.xz":[105,440],"real":[108,445],"s1":[109],"lsun":[111],"datasets":[112,136,446,453],"alongside":[113],"TimeSynth":[114],"files":[115,294,368,496],"(balanced":[116],"synthetic":[117,185,282,399,451],"timing),":[120],"as":[121,139],"per":[122,142,509],"Mohassel":[123,477],"et":[124,478],"al.\u2019s":[125,479],"experimental":[126],"framework":[127],".":[128,177,288,483],"Contents":[129],"1.":[130],"Contains":[132,365],"formatted":[138],"one":[140,506],"sample":[141],"line":[143],"space-separated":[145],"features:":[146],"iris.txt:":[147],"150":[148],"samples,":[149,163,180,193,206,222,236,248,260,277,407],"4":[150],"features,":[151,165,182,195,208,224,238,250,279],"3":[152,166,194,196,209,225],"classes;":[153,240,267],"classic":[154],"Iris":[156],"petal/sepal":[159],"measurements.":[160],"lsun.txt:":[161,443],"400":[162],"2":[164,181,239,278],"clusters;":[167,184,197,210,281],"two-dimensional":[168],"variant":[169],"LSUN":[172],"experiments":[176],"s1.txt:":[178],"5,000":[179],"15":[183],"Fr\u00e4nti\u2019s":[188],"S1":[189],"series.":[190],"house.txt:":[191],"1,837":[192],"housing":[198],"data":[199,507],"transformed":[200],"tasks.":[203],"adult.txt:":[204],"48,842":[205],"6":[207],"Census":[212],"Income":[213],"(\u201cAdult\u201d)":[214],"income":[217],"bracket":[218],"prediction.":[219],"wine.txt:":[220],"178":[221],"13":[223],"cultivars;":[226],"Wine":[228],"chemical":[231],"features.":[233],"breast.txt:":[234],"569":[235],"9":[237],"Wisconsin":[241],"Diagnostic":[242],"Breast":[243],"Cancer":[244],"dataset.":[245],"yeast.txt:":[246],"1,484":[247],"8":[249],"10":[251,265],"localization":[252,256],"sites;":[253],"yeast":[254],"protein":[255],"data.":[257],"mnist.txt:":[258],"10,000":[259],"784":[261],"features":[262,512],"(28\u00d728":[263],"pixels),":[264],"digit":[266],"MNIST":[268],"handwritten":[269],"digits.":[270],"birch2.txt:":[271],"(a":[272],"random)":[273],"25,000/100,000":[274],"subset":[275],"100":[280],"BIRCH2":[283],"high-cluster\u2010count":[286],"evaluation":[287],"2.":[289],"Holds":[291],"SynthNew_{k}_{d}_{s}.txt":[293],"where:":[298],"$k":[299,373,460],"\\in":[300,308,314,374,378,382,413,428,461,464,467],"\\{2,4,8,16,32\\}$":[301],"is":[302,310],"number":[304,339],"clusters,":[306,376,410],"$d":[307,377,412,463],"\\{2,4,8,16,32,64,128,256,512\\}$":[309],"dimensionality,":[312],"$s":[313],"\\{1,2,3\\}$":[315],"are":[316,321],"different":[317],"random":[318,338],"seeds.":[319],"These":[320],"sizes":[330,457],"following":[331,476],"a":[332,337],"$1:2:...:k$":[333],"ratio.":[334],"We":[335],"incorporate":[336],"(in":[340],"$[0,":[341],"100]$)":[342],"randomly":[344,353],"sampled":[345],"outliers":[346],"set":[348],"degrees":[352],"$[0.16,":[355],"0.26]$,":[356],"spanning":[357],"partially":[358],"overlapping":[359],"separated":[361,513],"clusters.":[362],"3.":[363],"AblateSynth_{k}_{d}_{sep}.txt":[367],"with:":[372],"\\{2,4,8,16\\}$":[375,379],"dimensions,":[380],"$sep":[381],"\\{0.25,":[383],"0.5,":[384],"0.75\\}$":[385],"controlling":[386],"degrees.":[389],"Also":[390],"clusterGeneration.":[393],"4.":[394],"Packages":[396],"(g2-{dim}-{var}.txt)":[401],"clustering-data":[404],"benchmarks:":[405],"$N=2048$":[406],"$k=2$":[408],"Gaussian":[409],"Dimensions":[411],"\\{1,":[414],"2,":[415],"4,":[416],"8,":[417],"16,":[418],"32,":[419],"64,":[420],"128,":[421],"256,":[422],"512,":[423],"1024\\}$":[424],"Cluster":[425],"overlap":[426],"$var":[427],"\\{10,":[429],"20,":[430],"30,":[431],"40,":[432],"50,":[433],"60,":[434],"70,":[435],"80,":[436],"90,":[437],"100\\}$":[438],"5.":[439],"Includes:":[441],"s1.txt,":[442],"baseline":[448],"timing.":[449],"timesynth_{k}_{d}_{n}.txt:":[450],"balanced":[455],"C_{avg}=N/K,":[458],"varying:":[459],"\\{2,5\\}$":[462,465],"$N":[466],"\\{10000;":[468],"100000\\}$":[469],"Generated":[470],"similarly":[471],"sets,":[475],"experiment":[481],"protocol":[482],"Usage:":[484],"Unpack":[485],"any":[486],"archive":[487],"tar":[489],"-xJf":[490],"<archive>.tar.xz":[491],"access":[493],".txt":[495],"directly":[497],"replication":[499],"experiments.":[502],"Each":[503],"file":[504],"point":[508],"line,":[510],"spaces.":[515]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
