{"id":"https://openalex.org/W4406458746","doi":"https://doi.org/10.1109/bigdata62323.2024.10825508","title":"Generalized compression and compressive search of large datasets","display_name":"Generalized compression and compressive search of large datasets","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458746","doi":"https://doi.org/10.1109/bigdata62323.2024.10825508"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825508","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825508","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112999409","display_name":"Morgan E. Prior","orcid":null},"institutions":[{"id":"https://openalex.org/I121934306","display_name":"Tufts University","ror":"https://ror.org/05wvpxv85","country_code":"US","type":"education","lineage":["https://openalex.org/I121934306"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Morgan E. Prior","raw_affiliation_strings":["Tufts University,Dept. of Computer Science,Boston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Tufts University,Dept. of Computer Science,Boston,MA,USA","institution_ids":["https://openalex.org/I121934306"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106456616","display_name":"Thomas J. Howard","orcid":"https://orcid.org/0000-0002-2927-1897"},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas J. Howard","raw_affiliation_strings":["University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA"],"affiliations":[{"raw_affiliation_string":"University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114400099","display_name":"Emily Light","orcid":null},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emily Light","raw_affiliation_strings":["University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA"],"affiliations":[{"raw_affiliation_string":"University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024517512","display_name":"Najib Ishaq","orcid":null},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Najib Ishaq","raw_affiliation_strings":["University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA"],"affiliations":[{"raw_affiliation_string":"University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071391690","display_name":"Noah M. Daniels","orcid":"https://orcid.org/0000-0002-9538-825X"},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Noah M. Daniels","raw_affiliation_strings":["University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA"],"affiliations":[{"raw_affiliation_string":"University of Rhode Island,Dept. of Computer Science and Statistics,Kingston,RI,USA","institution_ids":["https://openalex.org/I17626003"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5112999409"],"corresponding_institution_ids":["https://openalex.org/I121934306"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23737785,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5702","last_page":"5710"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.7535456418991089},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6035152077674866},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.510003924369812},{"id":"https://openalex.org/keywords/compressive-strength","display_name":"Compressive strength","score":0.459844708442688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27986904978752136},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.13273295760154724},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.10709023475646973}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.7535456418991089},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6035152077674866},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.510003924369812},{"id":"https://openalex.org/C30407753","wikidata":"https://www.wikidata.org/wiki/Q186191","display_name":"Compressive strength","level":2,"score":0.459844708442688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27986904978752136},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.13273295760154724},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.10709023475646973}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825508","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825508","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1521064342","https://openalex.org/W1988925586","https://openalex.org/W2034285706","https://openalex.org/W2074231493","https://openalex.org/W2092672051","https://openalex.org/W2123402141","https://openalex.org/W2128162768","https://openalex.org/W2148602530","https://openalex.org/W2167669612","https://openalex.org/W2219888463","https://openalex.org/W2787023214","https://openalex.org/W2949985202","https://openalex.org/W2963693826","https://openalex.org/W2998702515","https://openalex.org/W3006996401","https://openalex.org/W3114310315","https://openalex.org/W4212803274","https://openalex.org/W4214671568","https://openalex.org/W4252672146","https://openalex.org/W4310964066","https://openalex.org/W4362597616","https://openalex.org/W4386655612","https://openalex.org/W6601341176","https://openalex.org/W6636915900","https://openalex.org/W6699972550","https://openalex.org/W6856306521"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2612632602","https://openalex.org/W2321805087"],"abstract_inverted_index":{"The":[0],"Big":[1],"Data":[2],"explosion":[3],"has":[4],"necessitated":[5],"the":[6,66,70,75,79,96,104,116,164,170,210,221],"development":[7],"of":[8,65,78,107,111,115,146,169,180,218],"search":[9,22,54,84,161,204],"algorithms":[10,20,23,28],"that":[11,194,208],"scale":[12],"sub-linearly":[13],"in":[14,113,220],"time":[15,185],"and":[16,21,31,52,73,83,133,151,160,166,189],"memory.":[17],"While":[18],"compression":[19,156,175],"do":[24,34],"exist":[25],"independently,":[26],"few":[27],"offer":[29],"both,":[30],"those":[32,179],"which":[33,95],"are":[35],"domain-specific.":[36],"We":[37,140,154,192,213],"present":[38],"panCAKES,":[39],"a":[40,47,61,144],"novel":[41],"approach":[42],"to":[43,49,81,103,158,178],"compressive":[44,203],"search,":[45],"i.e.,":[46],"way":[48],"perform":[50],"k-NN":[51,188],"\u03c1-NN":[53,190],"on":[55,143,205],"compressed":[56,165],"data":[57,80],"while":[58,182],"only":[59],"decompressing":[60],"small,":[62],"relevant,":[63],"portion":[64],"data.":[67,153],"panCAKES":[68,87,142,173,195,219],"assumes":[69],"manifold":[71,211],"hypothesis":[72],"leverages":[74],"low-dimensional":[76],"structure":[77],"compress":[82],"it":[85],"efficiently.":[86],"is":[88,101,196],"generic":[89],"over":[90],"any":[91],"distance":[92,97,124],"function":[93],"for":[94,121,187,201],"between":[98,163],"two":[99],"points":[100],"proportional":[102],"memory":[105],"cost":[106],"storing":[108],"an":[109,197,215],"encoding":[110],"one":[112],"terms":[114],"other.":[117],"This":[118],"property":[119],"holds":[120],"many":[122],"widely-used":[123],"functions,":[125],"e.g.":[126],"string":[127],"edit":[128],"distances":[129],"(Levenshtein,":[130],"Needleman-Wunsch,":[131],"etc.)":[132],"set":[134,152],"dissimilarity":[135],"measures":[136],"(Jaccard,":[137],"Dice,":[138],"etc.).":[139],"benchmark":[141],"variety":[145],"datasets,":[147],"including":[148],"genomic,":[149],"proteomic,":[150],"compare":[155],"ratios":[157,176],"gzip,":[159,181],"performance":[162,186],"uncompressed":[167],"versions":[168],"same":[171],"dataset.":[172],"achieves":[174],"close":[177],"offering":[183],"sub-linear":[184],"search.":[191],"conclude":[193],"efficient,":[198],"general-purpose":[199],"algorithm":[200],"exact":[202],"large":[206],"datasets":[207],"obey":[209],"hypothesis.":[212],"provide":[214],"open-source":[216],"implementation":[217],"Rust":[222],"programming":[223],"language.":[224]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
