{"id":"https://openalex.org/W1972525637","doi":"https://doi.org/10.1145/1562764.1562789","title":"Finding the frequent items in streams of data","display_name":"Finding the frequent items in streams of data","publication_year":2009,"publication_date":"2009-10-01","ids":{"openalex":"https://openalex.org/W1972525637","doi":"https://doi.org/10.1145/1562764.1562789","mag":"1972525637"},"language":"en","primary_location":{"id":"doi:10.1145/1562764.1562789","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1562764.1562789","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031896681","display_name":"Graham Cormode","orcid":"https://orcid.org/0000-0002-0698-0922"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Graham Cormode","raw_affiliation_strings":["AT&amp;T Labs---Research, Florham Park, NJ"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs---Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058831095","display_name":"Marios Hadjieleftheriou","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marios Hadjieleftheriou","raw_affiliation_strings":["AT&amp;T Labs---Research, Florham Park, NJ"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs---Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":9.435,"has_fulltext":false,"cited_by_count":107,"citation_normalized_percentile":{"value":0.9811655,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"52","issue":"10","first_page":"97","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/streams","display_name":"STREAMS","score":0.7557305097579956},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6084716320037842},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.44324737787246704},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27947789430618286},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10095098614692688}],"concepts":[{"id":"https://openalex.org/C42090638","wikidata":"https://www.wikidata.org/wiki/Q4048907","display_name":"STREAMS","level":2,"score":0.7557305097579956},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6084716320037842},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.44324737787246704},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27947789430618286},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10095098614692688}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1562764.1562789","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1562764.1562789","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.150.453","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.150.453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.att.com/~marioh/papers/cacm09.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W31696346","https://openalex.org/W1493892051","https://openalex.org/W1553409264","https://openalex.org/W1601184934","https://openalex.org/W1634257591","https://openalex.org/W1675727887","https://openalex.org/W1766932551","https://openalex.org/W2004110412","https://openalex.org/W2004154913","https://openalex.org/W2006355640","https://openalex.org/W2018989507","https://openalex.org/W2026784425","https://openalex.org/W2050290319","https://openalex.org/W2069980026","https://openalex.org/W2075567379","https://openalex.org/W2080234606","https://openalex.org/W2080745194","https://openalex.org/W2113139394","https://openalex.org/W2119714163","https://openalex.org/W2126922390","https://openalex.org/W2150569458","https://openalex.org/W2152637787","https://openalex.org/W2188351697","https://openalex.org/W2562755098","https://openalex.org/W2621193479","https://openalex.org/W4233811068","https://openalex.org/W6631035194"],"related_works":["https://openalex.org/W2083347156","https://openalex.org/W4288026155","https://openalex.org/W115787419","https://openalex.org/W2012528039","https://openalex.org/W1582424504","https://openalex.org/W2382069349","https://openalex.org/W1492078655","https://openalex.org/W2313030483","https://openalex.org/W2334651808","https://openalex.org/W205648239"],"abstract_inverted_index":{"Many":[0],"data":[1,8,69,89,97,106,374,405,590,1125],"generation":[2],"processes":[3],"can":[4,46,54,70,288,307,341,465,480,522,542,633,991,1064,1099,1111],"be":[5,47,91,127,215,245,289,308,318,342,467,981,1065,1100,1112],"modeled":[6],"as":[7,50,53,321,359,399,430,681,700,1058,1077],"streams.":[9,375],"They":[10],"produce":[11,907],"huge":[12],"numbers":[13],"of":[14,16,19,38,49,57,62,79,81,135,150,163,201,208,220,229,251,258,276,284,298,302,315,336,367,412,440,444,476,492,588,607,623,684,708,735,755,764,779,787,799,813,817,824,846,852,884,935,955,983,1023,1032,1050,1074,1080,1090,1092,1105,1118,1124],"pieces":[15],"data,":[17,887],"each":[18,132,609],"which":[20,26,140,339,422,604,756,987,1096,1106],"is":[21,100,365,378,393,416,428,447,592,599,626,669,771,849,864,896,941,977,1056],"simple":[22,267],"in":[23,76,198,205,248,268,373,396,449,524,639,717,809,828,904,966,988,1017,1131],"isolation,":[24],"but":[25,567,1063],"taken":[27],"together":[28],"lead":[29],"to":[30,41,103,110,113,123,129,131,146,160,244,310,317,381,384,418,535,546,601,647,664,677,719,724,748,759,774,839,882,900,906,932,943,951,980,1067,1127,1138],"a":[31,51,63,96,174,255,274,293,312,324,333,400,410,437,473,682,701,709,810,860,868,984,1078,1122],"complex":[32,687],"whole.":[33],"For":[34],"example,":[35,272],"the":[36,55,77,105,114,147,161,164,199,202,206,209,218,221,227,230,242,249,260,279,285,360,368,414,441,455,484,487,490,493,496,500,505,512,517,540,552,557,586,589,620,624,631,657,665,706,713,725,733,736,752,761,765,775,796,800,806,825,832,844,850,894,925,933,953,967,1005,1014,1024,1029,1047,1072,1119,1143],"sequence":[37,411],"queries":[39,544],"posed":[40],"an":[42,349,547,847,897,913,1081],"Internet":[43,548],"search":[44,326,549],"engine":[45],"thought":[48],"stream,":[52,222],"collection":[56],"transactions":[58],"across":[59],"all":[60,299,432,818],"branches":[61],"supermarket":[64],"chain.":[65],"In":[66,168,831,1009,1041],"aggregate,":[67],"this":[68,88,125,169,269,346,427,573,938,1010,1018],"arrive":[71],"at":[72,217,595],"enormous":[73],"rates,":[74],"easily":[75],"realm":[78],"hundreds":[80],"gigabytes":[82],"per":[83,192],"day":[84],"or":[85,223,253,504,949],"higher.":[86],"While":[87],"may":[90],"archived":[92],"and":[93,117,137,154,180,190,281,386,390,398,460,486,531,551,582,641,652,661,727,888,902,928,997,1039,1102],"indexed":[94],"within":[95,332,345,402,889],"warehouse,":[98],"it":[99,108,598,940,990],"also":[101,466,617,670,772,879,1045,1086],"important":[102,394,600,671,898,947],"process":[104],"\"as":[107],"happens,\"":[109],"provide":[111,992,1068,1087],"up":[112],"minute":[115],"analysis":[116,632,1152],"statistics":[118],"on":[119,226,254,483,510,696,793,924,937,1071,1103],"current":[120],"trends.":[121],"Methods":[122],"achieve":[124],"must":[126,181,214],"quick":[128],"respond":[130],"new":[133,537,610,918],"piece":[134],"information,":[136],"use":[138],"resources":[139,188],"are":[141,195,266,457,489,514,555,562,605,645,757,959,1060,1136,1146],"very":[142,612,627],"small":[143,643],"when":[144,224,454,462],"compared":[145],"total":[148,442],"quantity":[149],"data.":[151],"These":[152,561],"applications":[153,675,838],"others":[155],"like":[156],"them":[157],"have":[158,575,648,878,969],"led":[159],"formulation":[162],"so-called":[165],"\"streaming":[166],"model.\"":[167],"abstraction,":[170],"algorithms":[171,571,603,794,963,1027,1055,1094,1098,1107,1120,1144],"take":[172],"only":[173,259,994],"single":[175],"pass":[176],"over":[177,635,1121],"their":[178,722,729,1075,1129,1150],"input,":[179],"accurately":[182],"compute":[183,721],"various":[184],"functions":[185],"while":[186],"using":[187,1036],"(space":[189],"time":[191],"item)":[193],"that":[194,232,630,856,1145],"strictly":[196],"sublinear":[197],"size":[200,491,587],"input---ideally,":[203],"polynomial":[204],"logarithm":[207],"input":[210],"size.":[211],"The":[212,353,376,478,739,767],"output":[213,243,1076],"produced":[216,921],"end":[219],"queried":[225],"prefix":[228],"stream":[231,275,335,406,710],"has":[233,328],"been":[234,576,880,970],"observed":[235],"so":[236,629],"far.":[237],"(Other":[238],"variations":[239],"ask":[240],"for":[241,271,528,532,572,795,804,1028,1108],"maintained":[246],"continuously":[247],"presence":[250],"updates,":[252],"\"sliding":[256,811],"window\"":[257,812],"most":[261,369,424,501,714,1006,1025],"recent":[262,814],"updates.)":[263],"Some":[264],"problems":[265,340,790,1110],"model:":[270],"given":[273,409,458],"transactions,":[277],"finding":[278,431,697,805,1033],"mean":[280],"standard":[282],"deviation":[283],"bill":[286],"totals":[287],"accomplished":[290],"by":[291,578,791,821,1020],"retaining":[292],"few":[294],"\"sufficient":[295],"statistics\"":[296],"(sum":[297],"values,":[300,304],"sum":[301],"squared":[303],"etc.).":[305],"Others":[306],"shown":[309,448,1066],"require":[311],"large":[313,334,579],"amount":[314,934],"information":[316],"stored,":[319],"such":[320],"determining":[322],"whether":[323],"particular":[325],"query":[327],"already":[329],"appeared":[330],"anywhere":[331],"queries.":[337],"Determining":[338],"solved":[343,866],"effectively":[344],"model":[347],"remains":[348,912],"active":[350,914],"research":[351,919],"area.":[352],"frequent":[354,497,553,666,679,692,698,715,753,768,788,807,826,869,1034],"items":[355,421,433,456,479,498,513,541,554,667,680,699,716,769,789,808,827,870,877,1035],"problem":[356,363,377,415,471,574,668,770,895,927,1031],"(also":[357],"known":[358],"heavy":[361],"hitters":[362,834],")":[364],"one":[366,899],"heavily":[370],"studied":[371],"questions":[372],"popular":[379,502,559,777],"due":[380],"its":[382,387,929],"simplicity":[383],"state,":[385],"intuitive":[388],"interest":[389],"value.":[391],"It":[392,616,863,911],"both":[395],"itself,":[397],"subroutine":[401,683],"more":[403,686],"advanced":[404],"computations.":[407],"Informally,":[408],"items,":[413],"simply":[417,564],"find":[419,602,678],"those":[420],"occur":[423],"frequently.":[425],"Typically,":[426],"formalized":[429],"whose":[434],"frequency":[435,762],"exceeds":[436],"specified":[438],"fraction":[439],"number":[443,851],"items.":[445],"This":[446,469,520],"Figure":[450],"1.":[451],"Variations":[452],"arise":[453],"weights,":[459],"further":[461,654],"these":[463,1051,1054,1093],"weights":[464,488],"negative.":[468],"abstract":[470],"captures":[472],"wide":[474],"variety":[475,1123],"settings.":[477],"represent":[481,499,543],"packets":[482],"Internet,":[485],"packets.":[494],"Then":[495],"destinations,":[503],"heaviest":[506],"bandwidth":[507],"users":[508],"(depending":[509],"how":[511],"extracted":[515,758],"from":[516,743,1149],"flow":[518],"identifiers).":[519],"knowledge":[521],"help":[523,655],"optimizing":[525],"routing":[526],"decisions,":[527],"in-network":[529],"caching,":[530],"planning":[533],"where":[534,570,962],"add":[536],"capacity.":[538],"Or,":[539],"made":[545],"engine,":[550],"now":[556],"(currently)":[558],"terms.":[560],"not":[563,1001,1147],"hypothetical":[565],"examples,":[566],"genuine":[568],"cases":[569,961],"applied":[577,881],"corporations:":[580],"AT&amp;T":[581],"Google,":[583],"respectively.":[584],"Given":[585],"(which":[591],"being":[593],"generated":[594],"high":[596],"speed),":[597],"capable":[606],"processing":[608],"update":[611],"quickly,":[613],"without":[614],"blocking.":[615],"helps":[618],"if":[619],"working":[621],"space":[622],"algorithm":[625,871],"small,":[628],"happen":[634],"many":[636,673,829,917,1091],"different":[637,1109],"groups":[638],"parallel,":[640],"because":[642],"structures":[644],"likely":[646],"better":[649],"cache":[650],"behavior":[651],"hence":[653],"increase":[656],"throughput.":[658],"Obtaining":[659],"efficient":[660,908],"scalable":[662],"solutions":[663],"since":[672],"streaming":[674,886,909],"need":[676],"another,":[685],"computation.":[688],"Most":[689],"directly,":[690],"mining":[691],"itemsets":[693],"inherently":[694],"builds":[695],"basic":[702],"building":[703,792],"block.":[704],"Finding":[705],"entropy":[707,734],"requires":[711],"learning":[712],"order":[718,905],"directly":[720],"contribution":[723,730],"entropy,":[726],"remove":[728],"before":[731],"approximating":[732],"residual":[737],"stream.":[738,766],"HSS":[740],"(Hierarchical":[741],"Sampling":[742],"Sketches)":[744],"technique":[745],"uses":[746],"hashing":[747],"derive":[749],"multiple":[750],"substreams,":[751],"elements":[754],"estimate":[760],"moments":[763],"related":[773],"recently":[776],"area":[778],"Compressed":[780],"Sensing.":[781],"Other":[782],"work":[783,936,976],"solves":[784],"generalized":[785],"versions":[786],"\"vanilla\"":[797],"version":[798],"problem.":[801],"Several":[802],"techniques":[803],"updates":[815],"(instead":[816],"updates)":[819],"operate":[820],"keeping":[822],"track":[823],"sub-windows.":[830],"\"heavy":[833],"distinct\"":[835],"problem,":[836,939],"with":[837,859,872,916,993],"detecting":[840],"network":[841],"scanning":[842],"attacks,":[843],"count":[845],"item":[848,857],"distinct":[853,873],"pairs":[854],"containing":[855],"paired":[858],"secondary":[861],"item.":[862],"typically":[865],"extending":[867],"counting":[874],"algorithms.":[875,957,1052],"Frequent":[876],"models":[883],"probabilistic":[885],"faster":[890],"\"skipping\"":[891],"techniques.":[892],"Thus":[893],"understand":[901],"study":[903],"implementations.":[910],"area,":[915,1019],"contributions":[920],"every":[922],"year":[923],"core":[926,1030],"variations.":[930],"Due":[931],"easy":[942],"miss":[944],"out":[945],"some":[946,1022],"references":[948],"fail":[950],"appreciate":[952],"properties":[954],"certain":[956,985],"There":[958],"several":[960],"first":[964],"published":[965],"1980s":[968],"\"rediscovered\"":[971],"two":[972],"decades":[973],"later;":[974],"existing":[975],"sometimes":[978],"claimed":[979],"incapable":[982],"guarantee,":[986],"truth":[989],"minor":[995],"modifications;":[996],"experimental":[998,1116],"evaluations":[999],"do":[1000],"always":[1002],"compare":[1003],"against":[1004,1095],"suitable":[1007],"methods.":[1008],"paper,":[1011],"we":[1012,1044,1135],"present":[1013,1046],"main":[1015],"ideas":[1016],"describing":[1021],"significant":[1026],"common":[1037],"notation":[1038],"terminology.":[1040],"doing":[1042],"so,":[1043],"historical":[1048],"development":[1049],"Studying":[1053],"instructive,":[1057],"they":[1059],"relatively":[1061],"simple,":[1062],"formal":[1069],"guarantees":[1070],"quality":[1073],"function":[1079],"accuracy":[1082],"parameter":[1083],"\u03b5.":[1084],"We":[1085,1114],"baseline":[1088],"implementations":[1089],"future":[1097],"compared,":[1101],"top":[1104],"built.":[1113],"perform":[1115],"evaluation":[1117],"sets":[1126],"indicate":[1128],"performance":[1130],"practice.":[1132],"From":[1133],"this,":[1134],"able":[1137],"identify":[1139],"clear":[1140],"distinctions":[1141],"among":[1142],"apparent":[1148],"theoretical":[1151],"alone.":[1153]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":10},{"year":2013,"cited_by_count":11},{"year":2012,"cited_by_count":10}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
