{"id":"https://openalex.org/W4381330367","doi":"https://doi.org/10.1145/3589307","title":"Data Stream Clustering: An In-depth Empirical Study","display_name":"Data Stream Clustering: An In-depth Empirical Study","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4381330367","doi":"https://doi.org/10.1145/3589307"},"language":"en","primary_location":{"id":"doi:10.1145/3589307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589307","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092212618","display_name":"Xin Wang","orcid":"https://orcid.org/0009-0007-6483-9357"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xin Wang","raw_affiliation_strings":["Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005520653","display_name":"Zhengru Wang","orcid":"https://orcid.org/0009-0005-9070-7433"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhengru Wang","raw_affiliation_strings":["Nvidia, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Nvidia, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100773431","display_name":"Zhenyu Wu","orcid":"https://orcid.org/0000-0003-0981-5567"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhenyu Wu","raw_affiliation_strings":["University of Manchester, Manchester, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Manchester, Manchester, United Kingdom","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696504","display_name":"Shuhao Zhang","orcid":"https://orcid.org/0000-0002-9927-6925"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shuhao Zhang","raw_affiliation_strings":["Singapore University of Technology and Design, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design, Singapore, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016486553","display_name":"Xuanhua Shi","orcid":"https://orcid.org/0000-0001-8451-8656"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanhua Shi","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072929639","display_name":"Li Lu","orcid":"https://orcid.org/0000-0001-7904-8821"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Lu","raw_affiliation_strings":["Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24185976"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092212618"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":1.5711,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.86111344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"1","issue":"2","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7847532033920288},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.7353245615959167},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7111057639122009},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.6271414160728455},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.597563624382019},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.5290616750717163},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.5070290565490723},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.44850388169288635},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.4465647339820862},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.4278351068496704},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3273252844810486},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2963274419307709},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.11695170402526855}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7847532033920288},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.7353245615959167},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7111057639122009},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.6271414160728455},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.597563624382019},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.5290616750717163},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.5070290565490723},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.44850388169288635},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.4465647339820862},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.4278351068496704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3273252844810486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2963274419307709},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.11695170402526855},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3589307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589307","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/f5a07cd3-e885-4729-882a-67d1d0c30f41","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/f5a07cd3-e885-4729-882a-67d1d0c30f41","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Wang, X, Wang, Z, Wu, Z, Zhang, S, Shi, X & Lu, L 2023, 'Data Stream Clustering: An In-depth Empirical Study', Proceedings of the ACM on Management of Data, vol. 1, no. 2. https://doi.org/10.1145/3589307","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4099999964237213}],"awards":[{"id":"https://openalex.org/G676909771","display_name":null,"funder_award_id":"2020AAA0108501","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W182707955","https://openalex.org/W1673310716","https://openalex.org/W1975852288","https://openalex.org/W2006685053","https://openalex.org/W2011396747","https://openalex.org/W2027430431","https://openalex.org/W2080330461","https://openalex.org/W2088340225","https://openalex.org/W2092335550","https://openalex.org/W2104564392","https://openalex.org/W2112482089","https://openalex.org/W2130416896","https://openalex.org/W2150593711","https://openalex.org/W2151242668","https://openalex.org/W2168386304","https://openalex.org/W2169581671","https://openalex.org/W2343620495","https://openalex.org/W2434851943","https://openalex.org/W2607665052","https://openalex.org/W2810103615","https://openalex.org/W2898017895","https://openalex.org/W2915063781","https://openalex.org/W2992559729","https://openalex.org/W3003253354","https://openalex.org/W3136012308","https://openalex.org/W4231029117","https://openalex.org/W4239963787","https://openalex.org/W4399637052"],"related_works":["https://openalex.org/W2499612753","https://openalex.org/W3111802945","https://openalex.org/W2946096271","https://openalex.org/W2295423552","https://openalex.org/W3107369729","https://openalex.org/W2998615029","https://openalex.org/W3190734578","https://openalex.org/W1595351371","https://openalex.org/W3121032028","https://openalex.org/W91065195"],"abstract_inverted_index":{"Data":[0],"Stream":[1],"Clustering":[2],"(DSC)":[3],"plays":[4],"an":[5,126],"important":[6],"role":[7],"in":[8,15,76,125,152],"mining":[9],"continuous":[10],"and":[11,32,59,131,137,146,157],"unlabeled":[12],"data":[13,52],"streams":[14],"real-world":[16,81,136],"applications.":[17],"Over":[18],"the":[19,35,77,97,143,169,191],"last":[20],"decades,":[21],"numerous":[22],"DSC":[23,40,108],"algorithms":[24],"have":[25],"been":[26],"proposed":[27],"with":[28,83],"promising":[29],"clustering":[30],"accuracy":[31,156,185],"efficiency.":[33,158],"Despite":[34],"significant":[36],"differences":[37],"among":[38],"existing":[39],"algorithms,":[41],"they":[42],"are":[43],"commonly":[44],"built":[45],"around":[46],"four":[47,111],"key":[48,73,112],"design":[49,74,113,123,150,165],"aspects:":[50],"summarizing":[51],"structure,":[53],"window":[54],"model,":[55],"outlier":[56],"detection":[57],"mechanism,":[58],"offline":[60],"refinement":[61],"strategy.":[62],"However,":[63],"there":[64],"is":[65,90],"a":[66,87,105,172],"lack":[67],"of":[68,107,119,121,148,154,171],"empirical":[69],"studies":[70],"on":[71,109],"these":[72,122],"aspects":[75],"same":[78],"codebase":[79],"using":[80,134],"workloads":[82],"distinct":[84],"characteristics.":[85],"As":[86],"result,":[88],"it":[89],"difficult":[91],"for":[92],"researchers":[93],"to":[94,168,181,190],"improve":[95],"upon":[96],"state-of-the-art.":[98,192],"In":[99],"this":[100],"paper,":[101],"we":[102],"conduct":[103],"such":[104],"study":[106],"its":[110],"aspects.":[114],"We":[115,159],"implemented":[116],"state-of-the-art":[117],"variants":[118],"all":[120],"choices":[124,166],"open-sourced":[127],"platform":[128],"from":[129],"scratch":[130],"evaluated":[132],"them":[133],"both":[135,155],"synthetic":[138],"workloads.":[139],"Our":[140],"analysis":[141],"identifies":[142],"fundamental":[144],"issues":[145],"trade-offs":[147],"each":[149],"choice":[151],"terms":[153],"even":[160],"find":[161],"that":[162],"combining":[163],"flexible":[164],"led":[167],"development":[170],"new":[173],"algorithm":[174],"called":[175],"Benne,":[176],"which":[177],"can":[178],"be":[179],"tuned":[180],"achieve":[182],"either":[183],"better":[184,187],"or":[186],"efficiency":[188],"compared":[189]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
