{"id":"https://openalex.org/W3042441546","doi":"https://doi.org/10.1109/tkde.2021.3103819","title":"Frequency Estimation in Data Streams: Learning the Optimal Hashing Scheme","display_name":"Frequency Estimation in Data Streams: Learning the Optimal Hashing Scheme","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3042441546","doi":"https://doi.org/10.1109/tkde.2021.3103819","mag":"3042441546"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2021.3103819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2021.3103819","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://export.arxiv.org/pdf/2007.09261","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025940525","display_name":"Dimitris Bertsimas","orcid":"https://orcid.org/0000-0002-1985-1003"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dimitris Bertsimas","raw_affiliation_strings":["Sloan School of Management and the Operations Research Center, Massachusetts Institute of Technology, Cambridge, MA, USA","[Operations Research Center and Sloan School of Management, Massachusetts Institute of Technology, 2167 Cambridge, Massachusetts, United States, (e-mail: dbertsim@mit.edu)]"],"affiliations":[{"raw_affiliation_string":"Sloan School of Management and the Operations Research Center, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"[Operations Research Center and Sloan School of Management, Massachusetts Institute of Technology, 2167 Cambridge, Massachusetts, United States, (e-mail: dbertsim@mit.edu)]","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102900284","display_name":"Vassilis Digalakis","orcid":"https://orcid.org/0000-0001-6770-7543"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vassilis Digalakis","raw_affiliation_strings":["Operations Research Center, Massachusetts Institute of Technology, Cambridge, MA, USA","[Operations Research Center, Massachusetts Institute of Technology, 2167 Cambridge, Massachusetts, United States, 02139-4307 (e-mail: vvdigalakis@gmail.com)]"],"affiliations":[{"raw_affiliation_string":"Operations Research Center, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"[Operations Research Center, Massachusetts Institute of Technology, 2167 Cambridge, Massachusetts, United States, 02139-4307 (e-mail: vvdigalakis@gmail.com)]","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5025940525"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.01186091,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.7897402048110962},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7369906902313232},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.627623438835144},{"id":"https://openalex.org/keywords/coordinate-descent","display_name":"Coordinate descent","score":0.5865628719329834},{"id":"https://openalex.org/keywords/data-stream","display_name":"Data stream","score":0.5534915924072266},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5030316710472107},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.42403602600097656},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3355874717235565},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32829850912094116},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16440561413764954}],"concepts":[{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.7897402048110962},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7369906902313232},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.627623438835144},{"id":"https://openalex.org/C157553263","wikidata":"https://www.wikidata.org/wiki/Q5168004","display_name":"Coordinate descent","level":2,"score":0.5865628719329834},{"id":"https://openalex.org/C2778484313","wikidata":"https://www.wikidata.org/wiki/Q1172540","display_name":"Data stream","level":2,"score":0.5534915924072266},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5030316710472107},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.42403602600097656},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3355874717235565},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32829850912094116},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16440561413764954},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tkde.2021.3103819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2021.3103819","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"mag:3042441546","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2007.09261","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:dspace.mit.edu:1721.1/144082","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/144082","pdf_url":null,"source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv","raw_type":"http://purl.org/eprint/type/JournalArticle"},{"id":"doi:10.48550/arxiv.2007.09261","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2007.09261","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"mag:3042441546","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2007.09261","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1493892051","https://openalex.org/W1594031697","https://openalex.org/W1628908379","https://openalex.org/W1858168446","https://openalex.org/W1965972569","https://openalex.org/W1971402995","https://openalex.org/W1972525637","https://openalex.org/W1993284846","https://openalex.org/W2006355640","https://openalex.org/W2017851434","https://openalex.org/W2022858489","https://openalex.org/W2027973753","https://openalex.org/W2068714596","https://openalex.org/W2069980026","https://openalex.org/W2080234606","https://openalex.org/W2080745194","https://openalex.org/W2123845384","https://openalex.org/W2140960149","https://openalex.org/W2143321506","https://openalex.org/W2153273131","https://openalex.org/W2164338181","https://openalex.org/W2167973519","https://openalex.org/W2219727625","https://openalex.org/W2411707397","https://openalex.org/W2439904216","https://openalex.org/W2505774719","https://openalex.org/W2563592268","https://openalex.org/W2582351091","https://openalex.org/W2607264901","https://openalex.org/W2616732338","https://openalex.org/W2753629899","https://openalex.org/W2791559636","https://openalex.org/W2885801653","https://openalex.org/W2887192039","https://openalex.org/W2890643081","https://openalex.org/W2891784792","https://openalex.org/W2909813108","https://openalex.org/W2911964244","https://openalex.org/W2914354916","https://openalex.org/W2953880153","https://openalex.org/W2962771342","https://openalex.org/W2963836097","https://openalex.org/W2963853546","https://openalex.org/W2967227187","https://openalex.org/W2968986602","https://openalex.org/W3042645139","https://openalex.org/W3102476541","https://openalex.org/W3102722370","https://openalex.org/W4249843299","https://openalex.org/W6639080731","https://openalex.org/W6675354045","https://openalex.org/W6681198211","https://openalex.org/W6684181414","https://openalex.org/W6736495275","https://openalex.org/W6749569130","https://openalex.org/W6752067599","https://openalex.org/W6754415367","https://openalex.org/W6754991252","https://openalex.org/W6758564351","https://openalex.org/W6767105887","https://openalex.org/W6921652704"],"related_works":["https://openalex.org/W1992253014","https://openalex.org/W3106527528","https://openalex.org/W197827279","https://openalex.org/W2946064759","https://openalex.org/W2805134925","https://openalex.org/W3085799379","https://openalex.org/W3153361501","https://openalex.org/W2336920772","https://openalex.org/W3170032932","https://openalex.org/W2997574527","https://openalex.org/W2153246527","https://openalex.org/W2803553068","https://openalex.org/W3105192812","https://openalex.org/W2441222368","https://openalex.org/W2982425134","https://openalex.org/W2025624833","https://openalex.org/W3173225030","https://openalex.org/W2296107147","https://openalex.org/W2771421526","https://openalex.org/W2279147091"],"abstract_inverted_index":{"We":[0,65,138,154],"present":[1],"a":[2,120],"novel":[3],"approach":[4,49,143,159],"for":[5,83],"the":[6,37,41,47,61,87,128,141,157],"problem":[7],"of":[8,40,168,172,185,189],"frequency":[9,26,38,63],"estimation":[10,27,177,190],"in":[11,86,119,132,170,183],"data":[12,42],"streams":[13],"that":[14,156],"is":[15],"based":[16],"on":[17,32,145,149],"optimization":[18,71],"and":[19,59,148,179],"machine":[20,94],"learning.":[21],"Contrary":[22],"to":[23,35,55,76,96,126,165],"state-of-the-art":[24],"streaming":[25],"algorithms,":[28],"which":[29,73],"heavily":[30],"rely":[31],"random":[33],"hashing":[34,81],"maintain":[36],"distribution":[39],"steam":[43],"using":[44,135],"limited":[45],"storage,":[46],"proposed":[48,129,142,158],"exploits":[50],"an":[51,67,103],"observed":[52,88],"stream":[53,89],"prefix":[54],"near-optimally":[56],"hash":[57,97],"elements":[58,84],"compress":[60],"target":[62],"distribution.":[64],"develop":[66,102],"exact":[68],"mixed-integer":[69],"linear":[70,133],"formulation,":[72],"enables":[74],"us":[75],"compute":[77],"optimal":[78],"or":[79],"near-optimal":[80],"schemes":[82],"seen":[85],"prefix;":[90],"then,":[91],"we":[92,101,111,123],"use":[93],"learning":[95],"unseen":[98],"elements.":[99],"Further,":[100],"efficient":[104],"block":[105],"coordinate":[106],"descent":[107],"algorithm,":[108],"which,":[109],"as":[110],"empirically":[112,139],"show,":[113],"produces":[114],"high":[115],"quality":[116],"solutions,":[117],"and,":[118],"special":[121],"case,":[122],"are":[124],"able":[125],"solve":[127],"formulation":[130],"exactly":[131],"time":[134],"dynamic":[136],"programming.":[137],"evaluate":[140],"both":[144],"synthetic":[146],"datasets":[147],"real-world":[150],"search":[151],"query":[152],"data.":[153],"show":[155],"outperforms":[160],"existing":[161],"approaches":[162],"by":[163,180],"one":[164],"two":[166],"orders":[167],"magnitude":[169,188],"terms":[171,184],"its":[173,186],"average":[174],"(per":[175],"element)":[176],"error":[178],"45-90":[181],"percent":[182],"expected":[187],"error.":[191]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
