{"id":"https://openalex.org/W2146420558","doi":"https://doi.org/10.1145/1514894.1514926","title":"Tight results for clustering and summarizing data streams","display_name":"Tight results for clustering and summarizing data streams","publication_year":2009,"publication_date":"2009-03-23","ids":{"openalex":"https://openalex.org/W2146420558","doi":"https://doi.org/10.1145/1514894.1514926","mag":"2146420558"},"language":"en","primary_location":{"id":"doi:10.1145/1514894.1514926","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1514894.1514926","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1514894.1514926","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Database Theory","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/1514894.1514926","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114008024","display_name":"Sudipto Guha","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sudipto Guha","raw_affiliation_strings":["University of Pennsylvania, Philadelphia, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pennsylvania, Philadelphia, PA","institution_ids":["https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5114008024"],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":4.6409,"has_fulltext":true,"cited_by_count":48,"citation_normalized_percentile":{"value":0.95098169,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"268","last_page":"275"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8064777851104736},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.766128420829773},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6685739159584045},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.610451877117157},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.5879747271537781},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.5164023041725159},{"id":"https://openalex.org/keywords/data-stream","display_name":"Data stream","score":0.5069273114204407},{"id":"https://openalex.org/keywords/histogram","display_name":"Histogram","score":0.5024216175079346},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.4818536043167114},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4812454879283905},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.46675896644592285},{"id":"https://openalex.org/keywords/factor","display_name":"Factor (programming language)","score":0.42291679978370667},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4131513237953186},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3633134961128235},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32959315180778503},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.267356812953949},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.19804677367210388},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.16647863388061523},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14995184540748596},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11193779110908508}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8064777851104736},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.766128420829773},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6685739159584045},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.610451877117157},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.5879747271537781},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.5164023041725159},{"id":"https://openalex.org/C2778484313","wikidata":"https://www.wikidata.org/wiki/Q1172540","display_name":"Data stream","level":2,"score":0.5069273114204407},{"id":"https://openalex.org/C53533937","wikidata":"https://www.wikidata.org/wiki/Q185020","display_name":"Histogram","level":3,"score":0.5024216175079346},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.4818536043167114},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4812454879283905},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.46675896644592285},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.42291679978370667},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4131513237953186},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3633134961128235},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32959315180778503},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.267356812953949},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.19804677367210388},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.16647863388061523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14995184540748596},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11193779110908508},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/1514894.1514926","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1514894.1514926","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1514894.1514926","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Database Theory","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.upenn.edu:cis_papers-1420","is_oa":false,"landing_page_url":"https://repository.upenn.edu/cgi/viewcontent.cgi?article=1420&context=cis_papers","pdf_url":null,"source":{"id":"https://openalex.org/S4306402083","display_name":"ScholarlyCommons (University of Pennsylvania)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79576946","host_organization_name":"University of Pennsylvania","host_organization_lineage":["https://openalex.org/I79576946"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Departmental Papers (CIS)","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.214.8807","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.214.8807","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.377.6150","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.377.6150","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.edbt.org/Proceedings/2009-StPetersburg/icdt/papers/p0268-Guha.pdf","raw_type":"text"},{"id":"pmh:oai:repository.upenn.edu:20.500.14332/6440","is_oa":false,"landing_page_url":"https://repository.upenn.edu/handle/20.500.14332/6440","pdf_url":null,"source":{"id":"https://openalex.org/S4306402083","display_name":"ScholarlyCommons (University of Pennsylvania)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79576946","host_organization_name":"University of Pennsylvania","host_organization_lineage":["https://openalex.org/I79576946"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"published","raw_type":"Presentation"}],"best_oa_location":{"id":"doi:10.1145/1514894.1514926","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1514894.1514926","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1514894.1514926","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Database Theory","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G162342199","display_name":"CAREER: Information, Optimization and Approximation","funder_award_id":"0644119","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1755808366","display_name":null,"funder_award_id":"CCF-0430376CCF-0644119","funder_id":"https://openalex.org/F4320337387","funder_display_name":"Division of Computing and Communication Foundations"},{"id":"https://openalex.org/G7701956072","display_name":"Approximation Algorithms for Data Streams","funder_award_id":"0430376","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337387","display_name":"Division of Computing and Communication Foundations","ror":"https://ror.org/01mng8331"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2146420558.pdf","grobid_xml":"https://content.openalex.org/works/W2146420558.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1524387703","https://openalex.org/W1590291442","https://openalex.org/W1822348499","https://openalex.org/W1966395945","https://openalex.org/W1987752353","https://openalex.org/W2010410498","https://openalex.org/W2016910265","https://openalex.org/W2044028871","https://openalex.org/W2057058417","https://openalex.org/W2080068757","https://openalex.org/W2089265984","https://openalex.org/W2091684877","https://openalex.org/W2105910802","https://openalex.org/W2110704543","https://openalex.org/W2122731071","https://openalex.org/W2123297508","https://openalex.org/W2128869116","https://openalex.org/W2139276812","https://openalex.org/W2148588185","https://openalex.org/W2151242668","https://openalex.org/W2151616755","https://openalex.org/W2167816765","https://openalex.org/W2340787257","https://openalex.org/W4242587584"],"related_works":["https://openalex.org/W4310512815","https://openalex.org/W4389449520","https://openalex.org/W127192698","https://openalex.org/W2570600173","https://openalex.org/W2893008024","https://openalex.org/W2743735673","https://openalex.org/W4361801939","https://openalex.org/W2522231769","https://openalex.org/W4312214159","https://openalex.org/W2045938006"],"abstract_inverted_index":{"In":[0,18],"this":[1],"paper":[2],"we":[3,20],"investigate":[4],"algorithms":[5,38],"and":[6,25],"lower":[7,84],"bounds":[8,85,115,132],"for":[9,64,86],"summarization":[10],"problems":[11,41],"over":[12],"a":[13,30,57],"single":[14],"pass":[15],"data":[16,70],"stream.":[17],"particular":[19],"focus":[21],"on":[22,39,133],"histogram":[23],"construction":[24],"K-center":[26],"clustering.":[27],"We":[28,78,89],"provide":[29],"simple":[31],"framework":[32,55],"that":[33,91,95],"improves":[34],"upon":[35],"all":[36,129],"previous":[37],"these":[40,87,113,134],"in":[42],"either":[43],"the":[44,47,51,65,69,81,92,101,118],"space":[45],"bound,":[46],"approximation":[48,76],"factor":[49],"or":[50,106],"running":[52],"time.":[53],"The":[54],"uses":[56],"notion":[58],"of":[59,68,103,123,128],"\"streamstrapping\"":[60],"where":[61],"summaries":[62],"created":[63],"initial":[66],"prefixes":[67],"are":[71,116],"used":[72],"to":[73],"develop":[74],"better":[75],"algorithms.":[77],"also":[79],"prove":[80],"first":[82],"non-trivial":[83],"problems.":[88,135],"show":[90],"stricter":[93],"requirement":[94],"if":[96],"an":[97],"algorithm":[98],"accurately":[99],"approximates":[100],"error":[102],"every":[104,107],"bucket":[105],"cluster":[108],"produced":[109],"by":[110],"it,":[111],"then":[112],"upper":[114,131],"almost":[117],"best":[119],"possible.":[120],"This":[121],"property":[122],"accurate":[124],"estimation":[125],"is":[126],"true":[127],"known":[130]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
