{"id":"https://openalex.org/W2002298107","doi":"https://doi.org/10.1145/1376616.1376698","title":"Finding frequent items in probabilistic data","display_name":"Finding frequent items in probabilistic data","publication_year":2008,"publication_date":"2008-06-09","ids":{"openalex":"https://openalex.org/W2002298107","doi":"https://doi.org/10.1145/1376616.1376698","mag":"2002298107"},"language":"en","primary_location":{"id":"doi:10.1145/1376616.1376698","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1376616.1376698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2008 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100418224","display_name":"Qin Zhang","orcid":"https://orcid.org/0000-0002-6851-3115"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Qin Zhang","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong","Hong Kong University Of Science And Technology, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]},{"raw_affiliation_string":"Hong Kong University Of Science And Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450462","display_name":"Li Fei-Fei","orcid":"https://orcid.org/0000-0002-7481-0810"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feifei Li","raw_affiliation_strings":["Florida State University, Tallahassee, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahassee, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009196125","display_name":"Ke Yi","orcid":"https://orcid.org/0000-0002-2178-3716"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ke Yi","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong","Hong Kong University Of Science And Technology, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]},{"raw_affiliation_string":"Hong Kong University Of Science And Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100418224"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":null,"apc_paid":null,"fwci":22.7901,"has_fulltext":false,"cited_by_count":179,"citation_normalized_percentile":{"value":0.99687296,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.8674803972244263},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7388256788253784},{"id":"https://openalex.org/keywords/uncertain-data","display_name":"Uncertain data","score":0.7353962063789368},{"id":"https://openalex.org/keywords/probabilistic-database","display_name":"Probabilistic database","score":0.6723560690879822},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5548207759857178},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5333893895149231},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5254991054534912},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5209637880325317},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4806291460990906},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.4456339180469513},{"id":"https://openalex.org/keywords/probabilistic-relevance-model","display_name":"Probabilistic relevance model","score":0.4421535134315491},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.42252999544143677},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3521837592124939},{"id":"https://openalex.org/keywords/probabilistic-analysis-of-algorithms","display_name":"Probabilistic analysis of algorithms","score":0.2633207142353058},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2488066554069519},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23467803001403809},{"id":"https://openalex.org/keywords/database-theory","display_name":"Database theory","score":0.1169879138469696},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.09149008989334106}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.8674803972244263},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7388256788253784},{"id":"https://openalex.org/C2778865114","wikidata":"https://www.wikidata.org/wiki/Q7882489","display_name":"Uncertain data","level":2,"score":0.7353962063789368},{"id":"https://openalex.org/C174539288","wikidata":"https://www.wikidata.org/wiki/Q7246853","display_name":"Probabilistic database","level":4,"score":0.6723560690879822},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5548207759857178},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5333893895149231},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5254991054534912},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5209637880325317},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4806291460990906},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.4456339180469513},{"id":"https://openalex.org/C143017306","wikidata":"https://www.wikidata.org/wiki/Q3318133","display_name":"Probabilistic relevance model","level":4,"score":0.4421535134315491},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.42252999544143677},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3521837592124939},{"id":"https://openalex.org/C24404364","wikidata":"https://www.wikidata.org/wiki/Q7246846","display_name":"Probabilistic analysis of algorithms","level":3,"score":0.2633207142353058},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2488066554069519},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23467803001403809},{"id":"https://openalex.org/C12439846","wikidata":"https://www.wikidata.org/wiki/Q4809258","display_name":"Database theory","level":3,"score":0.1169879138469696},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.09149008989334106},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/1376616.1376698","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1376616.1376698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2008 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.141.1651","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.141.1651","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.ust.hk/~qinzhang/papers/fp337-zhang.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.144.7428","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.144.7428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.ust.hk/~yike/sigmod08.pdf","raw_type":"text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-20469","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-20469","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1203261677","https://openalex.org/W1506285740","https://openalex.org/W1612155886","https://openalex.org/W1675727887","https://openalex.org/W1970779762","https://openalex.org/W1973266211","https://openalex.org/W1977141583","https://openalex.org/W1993482412","https://openalex.org/W2003262311","https://openalex.org/W2017062116","https://openalex.org/W2044494469","https://openalex.org/W2050290319","https://openalex.org/W2065538099","https://openalex.org/W2069980026","https://openalex.org/W2078686663","https://openalex.org/W2080133348","https://openalex.org/W2080745194","https://openalex.org/W2088422262","https://openalex.org/W2097995023","https://openalex.org/W2099700811","https://openalex.org/W2105423800","https://openalex.org/W2106333474","https://openalex.org/W2108009586","https://openalex.org/W2110037957","https://openalex.org/W2110993096","https://openalex.org/W2113139394","https://openalex.org/W2118178019","https://openalex.org/W2120825705","https://openalex.org/W2129035130","https://openalex.org/W2138271690","https://openalex.org/W2138414767","https://openalex.org/W2138745909","https://openalex.org/W2142099410","https://openalex.org/W2142151675","https://openalex.org/W2151322578","https://openalex.org/W2164688391","https://openalex.org/W2166290564","https://openalex.org/W2166767032","https://openalex.org/W2166916904","https://openalex.org/W2167445567","https://openalex.org/W2167973519","https://openalex.org/W2171776999","https://openalex.org/W4206137901","https://openalex.org/W4250544186","https://openalex.org/W6651238488","https://openalex.org/W6672615329","https://openalex.org/W6680722260","https://openalex.org/W6684648153"],"related_works":["https://openalex.org/W1774414873","https://openalex.org/W4238495367","https://openalex.org/W2749065928","https://openalex.org/W2347240150","https://openalex.org/W2739264497","https://openalex.org/W2105402307","https://openalex.org/W2000023774","https://openalex.org/W123093449","https://openalex.org/W321922482","https://openalex.org/W1793369032"],"abstract_inverted_index":{"Computing":[0],"statistical":[1,36],"information":[2,91],"on":[3,38,115,163],"probabilistic":[4,39,55,98],"data":[5,15,22,56,83,99,131],"has":[6,121],"attracted":[7],"a":[8,18,54,73,111,146],"lot":[9],"of":[10,21,65,76,80,96,158],"attention":[11],"recently,":[12],"as":[13,165],"the":[14,42,50,62,77,81,93,97,101,105,116,137,156],"generated":[16,148],"from":[17],"wide":[19],"range":[20],"sources":[23],"are":[24,140],"inherently":[25],"fuzzy":[26],"or":[27],"uncertain.":[28],"In":[29],"this":[30,86],"paper,":[31],"we":[32,109],"study":[33,157],"an":[34,66],"important":[35,90],"query":[37,127],"data:":[40],"finding":[41],"frequent":[43,51,144,160],"items.":[44],"One":[45],"straightforward":[46],"approach":[47,152],"to":[48,59,134,142,155],"identify":[49],"items":[52,138,161],"in":[53,129,145],"set":[57],"is":[58],"simply":[60],"compute":[61],"expected":[63,78],"frequency":[64],"item":[67],"and":[68,100],"decide":[69],"if":[70],"it":[71],"exceeds":[72],"certain":[74],"fraction":[75],"size":[79],"whole":[82],"set.":[84],"However,":[85],"simple":[87],"definition":[88,113],"misses":[89],"about":[92],"internal":[94],"structure":[95],"interplay":[102],"among":[103],"all":[104,136],"uncertain":[106,130],"entities.":[107],"Thus,":[108],"propose":[110],"new":[112],"based":[114,162],"possible":[117,149],"world":[118],"semantics":[119],"that":[120,139],"been":[122],"widely":[123],"adopted":[124],"for":[125],"many":[126],"types":[128],"management,":[132],"trying":[133],"find":[135],"likely":[141],"be":[143],"randomly":[147],"world.":[150],"Our":[151],"naturally":[153],"leads":[154],"ranking":[159],"confidence":[164],"well.":[166]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":15},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":20},{"year":2013,"cited_by_count":16},{"year":2012,"cited_by_count":26}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
