{"id":"https://openalex.org/W2032840619","doi":"https://doi.org/10.1145/1014052.1016921","title":"Programming the K-means clustering algorithm in SQL","display_name":"Programming the K-means clustering algorithm in SQL","publication_year":2004,"publication_date":"2004-08-22","ids":{"openalex":"https://openalex.org/W2032840619","doi":"https://doi.org/10.1145/1014052.1016921","mag":"2032840619"},"language":"en","primary_location":{"id":"doi:10.1145/1014052.1016921","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1016921","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031610238","display_name":"Carlos Ordo\u0144\u1ebdz","orcid":"https://orcid.org/0009-0005-1135-9726"},"institutions":[{"id":"https://openalex.org/I108650528","display_name":"Teradata (United Kingdom)","ror":"https://ror.org/00gze1672","country_code":"GB","type":"company","lineage":["https://openalex.org/I108650528","https://openalex.org/I4210158714"]},{"id":"https://openalex.org/I4210158714","display_name":"Teradata (United States)","ror":"https://ror.org/04vkkat38","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158714"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Carlos Ordonez","raw_affiliation_strings":["Teradata, NCR, San Diego, CA","Teradata (NCR), San Diego, CA"],"affiliations":[{"raw_affiliation_string":"Teradata, NCR, San Diego, CA","institution_ids":["https://openalex.org/I4210158714"]},{"raw_affiliation_string":"Teradata (NCR), San Diego, CA","institution_ids":["https://openalex.org/I108650528"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5031610238"],"corresponding_institution_ids":["https://openalex.org/I108650528","https://openalex.org/I4210158714"],"apc_list":null,"apc_paid":null,"fwci":1.3864,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.84892247,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"823","last_page":"828"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8341110944747925},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7152851819992065},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.631085991859436},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6280213594436646},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.5729708671569824},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5589482188224792},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.557222306728363},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.44357311725616455},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39221155643463135},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.36557966470718384},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3339805603027344},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.28207123279571533},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.267433226108551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15802228450775146},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14208200573921204}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8341110944747925},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7152851819992065},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.631085991859436},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6280213594436646},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.5729708671569824},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5589482188224792},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.557222306728363},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.44357311725616455},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39221155643463135},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.36557966470718384},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3339805603027344},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.28207123279571533},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.267433226108551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15802228450775146},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14208200573921204}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1014052.1016921","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1016921","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.700.6959","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.700.6959","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www2.cs.uh.edu/%7Eordonez/w-pdf/w-2004-KDD-sqlkm.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W147860157","https://openalex.org/W1996006113","https://openalex.org/W2019916408","https://openalex.org/W2026450968","https://openalex.org/W2054638878","https://openalex.org/W2063680791","https://openalex.org/W2064803206","https://openalex.org/W2095897464","https://openalex.org/W2112210867","https://openalex.org/W2127218421","https://openalex.org/W2143979434","https://openalex.org/W2144405306","https://openalex.org/W2161872843","https://openalex.org/W2206925937","https://openalex.org/W4244268470","https://openalex.org/W4248008732"],"related_works":["https://openalex.org/W411760269","https://openalex.org/W2183916789","https://openalex.org/W2101174895","https://openalex.org/W2491448268","https://openalex.org/W2181939267","https://openalex.org/W2357149509","https://openalex.org/W2363054820","https://openalex.org/W2391008599","https://openalex.org/W2117838073","https://openalex.org/W2603768743"],"abstract_inverted_index":{"Using":[0],"SQL":[1,39],"has":[2],"not":[3],"been":[4],"considered":[5],"an":[6,37],"efficient":[7,38],"and":[8,26,61,79,101,106,109,113,117,121,136,147],"feasible":[9,34],"way":[10],"to":[11,35,73,95,104],"implement":[12],"data":[13,22,98,132,145],"mining":[14],"algorithms.":[15],"Although":[16],"this":[17,29],"is":[18,33,92],"true":[19],"for":[20],"many":[21],"mining,":[23],"machine":[24],"learning":[25],"statistical":[27],"algorithms,":[28],"work":[30,49],"shows":[31],"it":[32,91],"get":[36],"implementation":[40,141],"of":[41,52,67,89,124],"the":[42,69],"well-known":[43],"K-means":[44,140],"clustering":[45,81,119],"algorithm":[46],"that":[47],"can":[48,142],"on":[50],"top":[51],"a":[53,64,86],"relational":[54],"DBMS.":[55],"The":[56,138],"article":[57,70],"emphasizes":[58],"both":[59],"correctness":[60,65],"performance.":[62],"From":[63,85],"point":[66,88],"view":[68,90],"explains":[71],"how":[72,94],"compute":[74],"Euclidean":[75],"distance,":[76],"nearest-cluster":[77],"queries":[78],"updating":[80],"results":[82],"in":[83],"SQL.":[84],"performance":[87],"explained":[93],"cluster":[96,143],"large":[97,144],"sets":[99,133,146],"defining":[100],"indexing":[102],"tables":[103],"store":[105],"retrieve":[107],"intermediate":[108],"final":[110],"results,":[111],"optimizing":[112,116],"avoiding":[114],"joins,":[115],"simplifying":[118],"aggregations,":[120],"taking":[122],"advantage":[123],"sufficient":[125],"statistics.":[126],"Experiments":[127],"evaluate":[128],"scalability":[129],"with":[130],"synthetic":[131],"varying":[134],"size":[135],"dimensionality.":[137],"proposed":[139],"exhibits":[148],"linear":[149],"scalability.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
