{"id":"https://openalex.org/W2098571654","doi":"https://doi.org/10.1145/1376616.1376697","title":"An efficient filter for approximate membership checking","display_name":"An efficient filter for approximate membership checking","publication_year":2008,"publication_date":"2008-06-09","ids":{"openalex":"https://openalex.org/W2098571654","doi":"https://doi.org/10.1145/1376616.1376697","mag":"2098571654"},"language":"en","primary_location":{"id":"doi:10.1145/1376616.1376697","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1376616.1376697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2008 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113509258","display_name":"Kaushik Chakrabarti","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kaushik Chakrabarti","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038037154","display_name":"Surajit Chaudhuri","orcid":"https://orcid.org/0000-0001-8252-5270"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Surajit Chaudhuri","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Venkatesh Ganti","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Venkatesh Ganti","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101406351","display_name":"Dong Xin","orcid":"https://orcid.org/0000-0002-1414-9354"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Xin","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113509258"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":18.1811,"has_fulltext":false,"cited_by_count":71,"citation_normalized_percentile":{"value":0.99202336,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"805","last_page":"818"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.8092453479766846},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8087958097457886},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6074205636978149},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4350094795227051},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4260711073875427},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3805568218231201},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34775036573410034},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.12547829747200012}],"concepts":[{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.8092453479766846},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8087958097457886},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6074205636978149},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4350094795227051},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4260711073875427},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3805568218231201},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34775036573410034},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.12547829747200012}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1376616.1376697","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1376616.1376697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2008 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W8870360","https://openalex.org/W191231183","https://openalex.org/W192724328","https://openalex.org/W1495909593","https://openalex.org/W1502916507","https://openalex.org/W1541936324","https://openalex.org/W1983327091","https://openalex.org/W2041873531","https://openalex.org/W2064784316","https://openalex.org/W2096598900","https://openalex.org/W2099908986","https://openalex.org/W2099964107","https://openalex.org/W2121516976","https://openalex.org/W2123845384","https://openalex.org/W2127675794","https://openalex.org/W2134206624","https://openalex.org/W2138729491","https://openalex.org/W2140129471","https://openalex.org/W2154127540","https://openalex.org/W2159491434","https://openalex.org/W2160484851","https://openalex.org/W2161936973","https://openalex.org/W2621280964","https://openalex.org/W6600367688","https://openalex.org/W6607776381","https://openalex.org/W6629956336","https://openalex.org/W6674576723","https://openalex.org/W6683401941"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2053286651","https://openalex.org/W4231775656","https://openalex.org/W2181743346","https://openalex.org/W2187401768","https://openalex.org/W2046435967","https://openalex.org/W2181413294","https://openalex.org/W2989452537","https://openalex.org/W2477549100"],"abstract_inverted_index":{"We":[0,90],"consider":[1],"the":[2,77,81,92,108],"problem":[3,23],"of":[4,7,17,96,114],"identifying":[5,38],"sub-strings":[6,66,79],"input":[8],"text":[9,35],"strings":[10],"that":[11,67,104],"approximately":[12],"match":[13,69],"with":[14,70],"some":[15],"member":[16],"a":[18,49,54],"potentially":[19],"large":[20],"dictionary.":[21,82],"This":[22],"arises":[24],"in":[25,112],"several":[26],"important":[27],"applications":[28],"such":[29],"as":[30],"extracting":[31],"named":[32],"entities":[33],"from":[34,41],"documents":[36],"and":[37,52,74,94,102,118],"biological":[39],"concepts":[40],"biomedical":[42],"literature.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47,61],"develop":[48],"filter-verification":[50],"framework,":[51],"propose":[53],"novel":[55],"in-memory":[56],"filter":[57,64,98],"structure.":[58],"That":[59],"is,":[60],"first":[62],"quickly":[63],"out":[65],"cannot":[68],"any":[71],"dictionary":[72],"member,":[73],"then":[75],"verify":[76],"remaining":[78],"against":[80],"Our":[83],"method":[84],"does":[85],"not":[86],"produce":[87],"false":[88],"negatives.":[89],"demonstrate":[91],"efficiency":[93],"effectiveness":[95],"our":[97],"over":[99],"real":[100],"datasets,":[101],"show":[103],"it":[105],"significantly":[106],"outperforms":[107],"previous":[109],"best-known":[110],"methods":[111],"terms":[113],"both":[115],"filtering":[116],"power":[117],"computation":[119],"time.":[120]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":12},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
