{"id":"https://openalex.org/W4399175225","doi":"https://doi.org/10.1145/3654976","title":"Settling Time vs. Accuracy Tradeoffs for Clustering Big Data","display_name":"Settling Time vs. Accuracy Tradeoffs for Clustering Big Data","publication_year":2024,"publication_date":"2024-05-29","ids":{"openalex":"https://openalex.org/W4399175225","doi":"https://doi.org/10.1145/3654976"},"language":"en","primary_location":{"id":"doi:10.1145/3654976","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3654976","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3654976","source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3654976","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070866752","display_name":"Andrew Draganov","orcid":"https://orcid.org/0000-0002-1617-4166"},"institutions":[{"id":"https://openalex.org/I204337017","display_name":"Aarhus University","ror":"https://ror.org/01aj84f44","country_code":"DK","type":"education","lineage":["https://openalex.org/I204337017"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Andrew Draganov","raw_affiliation_strings":["Aarhus University, Aarhus, DK"],"affiliations":[{"raw_affiliation_string":"Aarhus University, Aarhus, DK","institution_ids":["https://openalex.org/I204337017"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005657476","display_name":"David Saulpic","orcid":"https://orcid.org/0000-0003-4208-8541"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I4210091437","display_name":"Sorbonne Paris Cit\u00e9","ror":"https://ror.org/001z21q04","country_code":"FR","type":"other","lineage":["https://openalex.org/I4210091437"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"David Saulpic","raw_affiliation_strings":["CNRS, CNRS &amp; Universit\u00e9 Paris Cit\u00e9, Paris, FR"],"affiliations":[{"raw_affiliation_string":"CNRS, CNRS &amp; Universit\u00e9 Paris Cit\u00e9, Paris, FR","institution_ids":["https://openalex.org/I204730241","https://openalex.org/I4210091437","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080748807","display_name":"Chris Schwiegelshohn","orcid":"https://orcid.org/0000-0002-1202-0805"},"institutions":[{"id":"https://openalex.org/I204337017","display_name":"Aarhus University","ror":"https://ror.org/01aj84f44","country_code":"DK","type":"education","lineage":["https://openalex.org/I204337017"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Chris Schwiegelshohn","raw_affiliation_strings":["Aarhus University, Aarhus, DK"],"affiliations":[{"raw_affiliation_string":"Aarhus University, Aarhus, DK","institution_ids":["https://openalex.org/I204337017"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5070866752"],"corresponding_institution_ids":["https://openalex.org/I204337017"],"apc_list":null,"apc_paid":null,"fwci":0.3376,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62767667,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"2","issue":"3","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8049982190132141},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7259225249290466},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5104719400405884},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.4858641028404236},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.48553821444511414},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.41949746012687683},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.41074705123901367},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.395111620426178},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36474132537841797},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.19327911734580994},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.17568689584732056}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8049982190132141},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7259225249290466},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5104719400405884},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.4858641028404236},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.48553821444511414},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.41949746012687683},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.41074705123901367},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.395111620426178},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36474132537841797},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.19327911734580994},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.17568689584732056},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3654976","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3654976","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3654976","source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-05148491v1","is_oa":true,"landing_page_url":"https://cnrs.hal.science/hal-05148491","pdf_url":"https://cnrs.hal.science/hal-05148491/document","source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the ACM on Management of Data, 2024, 2 (3), pp.1-25. &#x27E8;10.1145/3654976&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:pure.atira.dk:publications/d51056c9-9fcb-49a8-9564-9fc13c10eca4","is_oa":true,"landing_page_url":"https://pure.au.dk/portal/en/publications/d51056c9-9fcb-49a8-9564-9fc13c10eca4","pdf_url":"https://pure.au.dk/ws/files/451669201/3654976.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Draganov, A A, Saulpic, D & Schwiegelshohn, C 2024, 'Settling Time vs. Accuracy Tradeoffs for Clustering Big Data', Proceedings of the ACM on Management of Data, vol. 2, no. 3, 173. https://doi.org/10.1145/3654976","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/3654976","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3654976","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3654976","source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2689612763","display_name":null,"funder_award_id":"Marie","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4608865072","display_name":null,"funder_award_id":"101034413","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399175225.pdf"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W596522316","https://openalex.org/W1556219185","https://openalex.org/W1981773323","https://openalex.org/W2004931706","https://openalex.org/W2026705124","https://openalex.org/W2029698681","https://openalex.org/W2034616054","https://openalex.org/W2035836267","https://openalex.org/W2045964207","https://openalex.org/W2048442462","https://openalex.org/W2059651397","https://openalex.org/W2059971059","https://openalex.org/W2082353536","https://openalex.org/W2094048240","https://openalex.org/W2095897464","https://openalex.org/W2112796928","https://openalex.org/W2133157266","https://openalex.org/W2220402431","https://openalex.org/W2410099853","https://openalex.org/W2510191442","https://openalex.org/W2739434859","https://openalex.org/W2767385901","https://openalex.org/W2807006342","https://openalex.org/W2951931394","https://openalex.org/W2962777529","https://openalex.org/W3035569828","https://openalex.org/W3116773026","https://openalex.org/W3120740533","https://openalex.org/W3171540622","https://openalex.org/W3183193808","https://openalex.org/W3212356654","https://openalex.org/W3214634542","https://openalex.org/W4242408666","https://openalex.org/W4247692611","https://openalex.org/W4282010992","https://openalex.org/W4306818540","https://openalex.org/W4309868964","https://openalex.org/W4312588364","https://openalex.org/W4313227433","https://openalex.org/W4400399754","https://openalex.org/W4412334830","https://openalex.org/W6729392090","https://openalex.org/W6743933341","https://openalex.org/W6764258251","https://openalex.org/W6846669142","https://openalex.org/W6849969394","https://openalex.org/W6964056453"],"related_works":["https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W1690653314","https://openalex.org/W2275524962","https://openalex.org/W2401723157","https://openalex.org/W2065055572","https://openalex.org/W2784269775"],"abstract_inverted_index":{"We":[0,112],"study":[1],"the":[2,24,30,32,39,43,46,58,75,82,88,109,139,145,164,176,185,199,239],"theoretical":[3,73,216],"and":[4,10,41,70,92,172,178,217,234],"practical":[5,158,218],"runtime":[6],"limits":[7],"of":[8,60,90,138,166,224],"k-means":[9],"k-median":[11],"clustering":[12,19,44,222],"on":[13,45,152],"large":[14],"datasets.":[15],"Since":[16],"effectively":[17,132],"all":[18],"methods":[20],"are":[21,190,207],"slower":[22],"than":[23],"time":[25,69,107,134,140],"it":[26,96,141],"takes":[27,142],"to":[28,36,143,157,162,237],"read":[29,144],"dataset,":[31],"fastest":[33],"approach":[34,148],"is":[35,51,84,229],"quickly":[37],"compress":[38],"data":[40,225],"perform":[42],"compressed":[47],"representation.":[48],"Unfortunately,":[49],"there":[50,120],"no":[52],"universal":[53],"best":[54],"choice":[55],"for":[56,192,220],"compressing":[57],"number":[59],"points":[61,91],"--":[62,135],"while":[63,81],"random":[64],"sampling":[65,130,167,205],"runs":[66],"in":[67,108,131,175,187,201],"sublinear":[68],"coresets":[71,127,189],"provide":[72,213],"guarantees,":[74],"former":[76],"does":[77,121],"not":[78],"enforce":[79],"accuracy":[80],"latter":[83],"too":[85],"slow":[86],"as":[87,196,198],"numbers":[89],"clusters":[93],"grow.":[94],"Indeed,":[95],"has":[97,235],"been":[98],"conjectured":[99],"that":[100,119,125,149],"any":[101],"sensitivity-based":[102],"coreset":[103],"construction":[104],"requires":[105],"super-linear":[106],"datase":[110],"size.":[111,226],"examine":[113],"this":[114,153],"relationship":[115],"by":[116],"first":[117],"showing":[118],"exist":[122],"an":[123],"algorithm":[124],"obtains":[126],"via":[128],"sensitivity":[129],"linear":[133],"within":[136],"log-factors":[137],"data.":[146],"Any":[147],"significantly":[150],"improves":[151],"must":[154],"then":[155],"resort":[156],"heuristics,":[159],"leading":[160],"us":[161],"consider":[163],"spectrum":[165],"strategies":[168,206],"across":[169],"both":[170],"real":[171],"artificial":[173],"datasets":[174],"static":[177],"streaming":[179],"settings.":[180],"Through":[181],"this,":[182],"we":[183,212],"show":[184],"conditions":[186],"which":[188,202],"necessary":[191],"preserving":[193],"cluster":[194],"validity":[195],"well":[197],"settings":[200],"faster,":[203],"cruder":[204],"sufficient.":[208],"As":[209],"a":[210,214],"result,":[211],"comprehensive":[215],"blueprint":[219],"effective":[221],"regardless":[223],"Our":[227],"code":[228],"publicly":[230],"available":[231],"at":[232],"https://github.com/Andrew-Draganov/Fast-Coreset-Generation":[233],"scripts":[236],"recreate":[238],"experiments.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
