{"id":"https://openalex.org/W3012668526","doi":"https://doi.org/10.1145/3366423.3380183","title":"Real-Time Clustering for Large Sparse Online Visitor Data","display_name":"Real-Time Clustering for Large Sparse Online Visitor Data","publication_year":2020,"publication_date":"2020-04-20","ids":{"openalex":"https://openalex.org/W3012668526","doi":"https://doi.org/10.1145/3366423.3380183","mag":"3012668526"},"language":"en","primary_location":{"id":"doi:10.1145/3366423.3380183","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3366423.3380183","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Web Conference 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3366423.3380183","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091398218","display_name":"Gromit Yeuk-Yin Chan","orcid":"https://orcid.org/0000-0003-1356-4406"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gromit Yeuk-Yin Chan","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073516753","display_name":"Fan Du","orcid":"https://orcid.org/0000-0002-4383-8888"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Du","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009957887","display_name":"Ryan A. Rossi","orcid":"https://orcid.org/0000-0001-9758-0635"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan A. Rossi","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034026499","display_name":"Anup Rao","orcid":"https://orcid.org/0009-0004-7226-7860"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anup B. Rao","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071007779","display_name":"Eunyee Koh","orcid":"https://orcid.org/0000-0003-2091-5972"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eunyee Koh","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003584200","display_name":"Claudio Silva","orcid":"https://orcid.org/0000-0003-2452-2295"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cl\u00e1udio T. Silva","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006773757","display_name":"Juliana Freire","orcid":"https://orcid.org/0000-0003-3915-7075"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juliana Freire","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5091398218"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":1.0253,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.81121324,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1049","last_page":"1059"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11980","display_name":"Human Mobility and Location-Based Analysis","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/3313","display_name":"Transportation"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11980","display_name":"Human Mobility and Location-Based Analysis","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/3313","display_name":"Transportation"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8180752992630005},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7167285084724426},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5799272060394287},{"id":"https://openalex.org/keywords/visitor-pattern","display_name":"Visitor pattern","score":0.5685634613037109},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.555812418460846},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.5037862658500671},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4965546727180481},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.45517754554748535},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4463997483253479},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.43913397192955017},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4279102683067322},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4169422388076782},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2590978145599365},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2558493912220001},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.21603882312774658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8180752992630005},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7167285084724426},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5799272060394287},{"id":"https://openalex.org/C48947383","wikidata":"https://www.wikidata.org/wiki/Q830719","display_name":"Visitor pattern","level":2,"score":0.5685634613037109},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.555812418460846},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.5037862658500671},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4965546727180481},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.45517754554748535},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4463997483253479},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.43913397192955017},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4279102683067322},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4169422388076782},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2590978145599365},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2558493912220001},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.21603882312774658},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3366423.3380183","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3366423.3380183","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Web Conference 2020","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3366423.3380183","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3366423.3380183","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Web Conference 2020","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1484676273","https://openalex.org/W1502916507","https://openalex.org/W1530641188","https://openalex.org/W1571361236","https://openalex.org/W1585610988","https://openalex.org/W1597087655","https://openalex.org/W1736726159","https://openalex.org/W1991676464","https://openalex.org/W2002544066","https://openalex.org/W2016381774","https://openalex.org/W2025394193","https://openalex.org/W2028001415","https://openalex.org/W2033403400","https://openalex.org/W2055906546","https://openalex.org/W2093903345","https://openalex.org/W2097776316","https://openalex.org/W2116762767","https://openalex.org/W2130438374","https://openalex.org/W2141115795","https://openalex.org/W2142493242","https://openalex.org/W2143996849","https://openalex.org/W2150593711","https://openalex.org/W2151930506","https://openalex.org/W2165835468","https://openalex.org/W2294331997","https://openalex.org/W2341394227","https://openalex.org/W2399495295","https://openalex.org/W2415890883","https://openalex.org/W2478540127","https://openalex.org/W2495349325","https://openalex.org/W2507499466","https://openalex.org/W2520037496","https://openalex.org/W2606083510","https://openalex.org/W2751642492","https://openalex.org/W2753396776","https://openalex.org/W2766555770","https://openalex.org/W2893486522","https://openalex.org/W2902308725","https://openalex.org/W2906664357","https://openalex.org/W2911351672","https://openalex.org/W2960208834","https://openalex.org/W2962818688","https://openalex.org/W2962865691","https://openalex.org/W2963243812","https://openalex.org/W2963335963","https://openalex.org/W3004286518","https://openalex.org/W3105265400","https://openalex.org/W4229641819","https://openalex.org/W4249735943","https://openalex.org/W4301268929"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2027972911","https://openalex.org/W2146343568","https://openalex.org/W2013643406","https://openalex.org/W2157978810","https://openalex.org/W2597809628","https://openalex.org/W3046370962"],"abstract_inverted_index":{"Online":[0],"visitor":[1,210],"behaviors":[2],"are":[3],"often":[4,27],"modeled":[5],"as":[6],"a":[7,61,81,91,97,107,122,159,164,179,194],"large":[8],"sparse":[9,65,70],"matrix,":[10],"where":[11],"rows":[12],"represent":[13,17],"visitors":[14],"and":[15,80,111,128,133,148,167,188],"columns":[16],"behavior.":[18],"To":[19],"discover":[20],"customer":[21,204],"segments":[22,205],"with":[23,193],"different":[24,34],"hierarchies,":[25],"marketers":[26],"need":[28],"to":[29,42,77,163,202],"cluster":[30,84],"the":[31,39,51,74,87,95,184],"data":[32,146],"in":[33,170,183,212],"splits.":[35],"Such":[36],"analyses":[37],"require":[38],"clustering":[40,63,186],"algorithm":[41],"provide":[43],"real-time":[44,62],"responses":[45],"on":[46,126,142,174],"user":[47],"parameter":[48],"changes,":[49],"which":[50,105],"current":[52],"techniques":[53],"cannot":[54],"support.":[55],"In":[56],"this":[57],"paper,":[58],"we":[59,120,198],"propose":[60,121],"algorithm,":[64],"density":[66],"peaks,":[67],"for":[68,83,114],"large-scale":[69],"data.":[71,118],"It":[72],"pre-processes":[73],"input":[75],"points":[76],"compute":[78],"annotations":[79],"hierarchy":[82],"assignment.":[85],"While":[86],"assignment":[88],"is":[89,112],"only":[90],"single":[92],"scan":[93],"of":[94,172,208],"points,":[96],"naive":[98],"pre-processing":[99],"requires":[100],"measuring":[101],"all":[102],"pairwise":[103],"distances,":[104],"incur":[106],"quadratic":[108],"computation":[109],"overhead":[110],"infeasible":[113],"any":[115],"moderately":[116],"sized":[117],"Thus,":[119],"new":[123],"approach":[124,156],"based":[125],"MinHash":[127,166],"LSH":[129,168],"that":[130,144,154],"provides":[131,158],"fast":[132],"accurate":[134],"estimations.":[135],"We":[136],"also":[137],"describe":[138],"an":[139,200],"efficient":[140],"implementation":[141,169],"Spark":[143],"addresses":[145],"skew":[147],"memory":[149],"usage.":[150],"Our":[151],"experiments":[152],"show":[153],"our":[155],"(1)":[157],"better":[160],"approximation":[161],"compared":[162],"straightforward":[165],"terms":[171],"accuracy":[173],"real":[175],"datasets,":[176],"(2)":[177],"achieves":[178],"20":[180],"\u00d7":[181],"speedup":[182],"end-to-end":[185],"pipeline,":[187],"(3)":[189],"can":[190],"maintain":[191],"computations":[192],"small":[195],"memory.":[196],"Finally,":[197],"present":[199],"interface":[201],"explore":[203],"from":[206],"millions":[207],"online":[209],"records":[211],"real-time.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
