{"id":"https://openalex.org/W2034616054","doi":"https://doi.org/10.1145/2020408.2020515","title":"Fast clustering using MapReduce","display_name":"Fast clustering using MapReduce","publication_year":2011,"publication_date":"2011-08-21","ids":{"openalex":"https://openalex.org/W2034616054","doi":"https://doi.org/10.1145/2020408.2020515","mag":"2034616054"},"language":"en","primary_location":{"id":"doi:10.1145/2020408.2020515","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2020408.2020515","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025907388","display_name":"Alina Ene","orcid":"https://orcid.org/0000-0002-5818-1807"},"institutions":[{"id":"https://openalex.org/I2801919071","display_name":"University of Illinois System","ror":"https://ror.org/05e94g991","country_code":"US","type":"education","lineage":["https://openalex.org/I2801919071"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alina Ene","raw_affiliation_strings":["University of Illinois, Urbana, IL, USA","University of Illinois Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]},{"raw_affiliation_string":"University of Illinois Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006392819","display_name":"Sungjin Im","orcid":"https://orcid.org/0000-0001-5994-7280"},"institutions":[{"id":"https://openalex.org/I2801919071","display_name":"University of Illinois System","ror":"https://ror.org/05e94g991","country_code":"US","type":"education","lineage":["https://openalex.org/I2801919071"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sungjin Im","raw_affiliation_strings":["University of Illinois, Urbana, IL, USA","University of Illinois Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]},{"raw_affiliation_string":"University of Illinois Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044201440","display_name":"Benjamin Moseley","orcid":"https://orcid.org/0000-0001-8162-017X"},"institutions":[{"id":"https://openalex.org/I2801919071","display_name":"University of Illinois System","ror":"https://ror.org/05e94g991","country_code":"US","type":"education","lineage":["https://openalex.org/I2801919071"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin Moseley","raw_affiliation_strings":["University of Illinois, Urbana, IL, USA","University of Illinois Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]},{"raw_affiliation_string":"University of Illinois Urbana, IL, USA","institution_ids":["https://openalex.org/I2801919071"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":18.5802,"has_fulltext":false,"cited_by_count":215,"citation_normalized_percentile":{"value":0.99517486,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"681","last_page":"689"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8602173328399658},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8156736493110657},{"id":"https://openalex.org/keywords/canopy-clustering-algorithm","display_name":"Canopy clustering algorithm","score":0.5511233806610107},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.5377827286720276},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.5059065222740173},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.48115503787994385},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46108290553092957},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4590763747692108},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.44348061084747314},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4379216432571411},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.4298776090145111},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.42316773533821106},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.39120832085609436},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2761000394821167},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2301836907863617},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12243074178695679}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8602173328399658},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8156736493110657},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.5511233806610107},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.5377827286720276},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.5059065222740173},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.48115503787994385},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46108290553092957},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4590763747692108},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.44348061084747314},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4379216432571411},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.4298776090145111},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.42316773533821106},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39120832085609436},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2761000394821167},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2301836907863617},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12243074178695679},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2020408.2020515","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2020408.2020515","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W157682127","https://openalex.org/W1529111168","https://openalex.org/W1598064945","https://openalex.org/W1854155592","https://openalex.org/W1973264045","https://openalex.org/W1987047329","https://openalex.org/W2003207175","https://openalex.org/W2006514056","https://openalex.org/W2012929417","https://openalex.org/W2013089587","https://openalex.org/W2016332468","https://openalex.org/W2016973429","https://openalex.org/W2028761382","https://openalex.org/W2041571997","https://openalex.org/W2048974928","https://openalex.org/W2051586153","https://openalex.org/W2073459066","https://openalex.org/W2073583237","https://openalex.org/W2085751730","https://openalex.org/W2104507648","https://openalex.org/W2114493937","https://openalex.org/W2118858274","https://openalex.org/W2122465391","https://openalex.org/W2123297508","https://openalex.org/W2126399065","https://openalex.org/W2127468194","https://openalex.org/W2148372359","https://openalex.org/W2149565746","https://openalex.org/W2150593711","https://openalex.org/W2170616854"],"related_works":["https://openalex.org/W2559422900","https://openalex.org/W3144143113","https://openalex.org/W2491448268","https://openalex.org/W2181939267","https://openalex.org/W2892323093","https://openalex.org/W3174322327","https://openalex.org/W2160785859","https://openalex.org/W4306940721","https://openalex.org/W3120229345","https://openalex.org/W2394193399"],"abstract_inverted_index":{"Clustering":[0],"problems":[1],"have":[2,120],"numerous":[3],"applications":[4],"and":[5,45,50,98,159],"are":[6,76,173,188,193],"becoming":[7],"more":[8],"challenging":[9],"as":[10,107],"the":[11,14,31,43,68,95,114,150,163,179,196],"size":[12,97],"of":[13,135,153],"data":[15,96,116,185],"increases.":[16],"In":[17],"this":[18],"paper,":[19],"we":[20,66,201],"consider":[21],"designing":[22],"clustering":[23,47,55,74,104],"algorithms":[24,56,75,90,119,155,161,192,199],"that":[25,71,169,187,200],"can":[26],"be":[27,124],"used":[28,125],"in":[29,77,126,131],"MapReduce,":[30],"most":[32],"popular":[33,46],"programming":[34],"environment":[35],"for":[36,162],"processing":[37],"large":[38],"datasets.":[39],"We":[40,52,138,148],"focus":[41],"on":[42,113,184],"practical":[44],"problems,":[48],"k-center":[49],"k-median.":[51],"develop":[53],"fast":[54],"with":[57],"constant":[58,133],"factor":[59],"approximation":[60],"guarantees.":[61],"From":[62],"a":[63,79,101,132],"theoretical":[64,80],"perspective,":[65],"give":[67],"first":[69],"analysis":[70],"shows":[72],"several":[73,157],"MRC0,":[78],"MapReduce":[81,136],"class":[82],"introduced":[83],"by":[84,142],"Karloff":[85],"et":[86],"al.":[87],"[26].":[88],"Our":[89,118],"use":[91],"sampling":[92],"to":[93,123,156,175],"decrease":[94],"they":[99,129],"run":[100,130],"time":[102],"consuming":[103],"algorithm":[105,112],"such":[106],"local":[108],"search":[109],"or":[110,176],"Lloyd's":[111],"resulting":[115],"set.":[117],"sufficient":[121],"flexibility":[122],"practice":[127],"since":[128],"number":[134],"rounds.":[137],"complement":[139],"these":[140],"results":[141],"performing":[143],"experiments":[144,167],"using":[145],"our":[146,154,170,191],"algorithms.":[147],"compare":[149],"empirical":[151],"performance":[152],"sequential":[158],"parallel":[160,198],"k-median":[164],"problem.":[165],"The":[166],"show":[168],"algorithms'":[171,181],"solutions":[172],"similar":[174],"better":[177],"than":[178,195],"other":[180,197],"solutions.":[182],"Furthermore,":[183],"sets":[186],"sufficiently":[189],"large,":[190],"faster":[194],"tested.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":20},{"year":2018,"cited_by_count":18},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":15},{"year":2015,"cited_by_count":31},{"year":2014,"cited_by_count":41},{"year":2013,"cited_by_count":19},{"year":2012,"cited_by_count":11}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
