{"id":"https://openalex.org/W2140538566","doi":"https://doi.org/10.1109/tkde.2005.55","title":"Compression, clustering, and pattern discovery in very high-dimensional discrete-attribute data sets","display_name":"Compression, clustering, and pattern discovery in very high-dimensional discrete-attribute data sets","publication_year":2005,"publication_date":"2005-03-07","ids":{"openalex":"https://openalex.org/W2140538566","doi":"https://doi.org/10.1109/tkde.2005.55","mag":"2140538566"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2005.55","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2005.55","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035558591","display_name":"Mehmet Koyut\u00fcrk","orcid":"https://orcid.org/0000-0002-3434-5512"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"M. Koyuturk","raw_affiliation_strings":["Department of Computer Sciences, Purdue University, West Lafayette, IN, USA","Department of Computer Science, Purdue University, West Lafayette IN, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Department of Computer Science, Purdue University, West Lafayette IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019202832","display_name":"Ananth Grama","orcid":"https://orcid.org/0000-0002-9378-9244"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A. Grama","raw_affiliation_strings":["Department of Computer Sciences, Purdue University, West Lafayette, IN, USA","Department of Computer Science, Purdue University, West Lafayette IN, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Department of Computer Science, Purdue University, West Lafayette IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035052603","display_name":"Naren Ramakrishnan","orcid":"https://orcid.org/0000-0002-1821-9743"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]},{"id":"https://openalex.org/I36253440","display_name":"IEEE Computer Society","ror":"https://ror.org/05nxk6n24","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I36253440"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"N. Ramakrishnan","raw_affiliation_strings":["Department of Computer Science, Virginia Technology, Blacksburg, VA, USA","IEEE Computer Society#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Virginia Technology, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]},{"raw_affiliation_string":"IEEE Computer Society#TAB#","institution_ids":["https://openalex.org/I36253440"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035558591"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":16.1159,"has_fulltext":false,"cited_by_count":83,"citation_normalized_percentile":{"value":0.98739048,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"17","issue":"4","first_page":"447","last_page":"461"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.843264102935791},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.7940128445625305},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7361105680465698},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5401128530502319},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5119146108627319},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5047718286514282},{"id":"https://openalex.org/keywords/association-rule-learning","display_name":"Association rule learning","score":0.4960575997829437},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.48161906003952026},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4695318937301636},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.439439594745636},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.42655158042907715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2515013813972473},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1761438250541687}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.843264102935791},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.7940128445625305},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7361105680465698},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5401128530502319},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5119146108627319},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5047718286514282},{"id":"https://openalex.org/C193524817","wikidata":"https://www.wikidata.org/wiki/Q386780","display_name":"Association rule learning","level":2,"score":0.4960575997829437},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.48161906003952026},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4695318937301636},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.439439594745636},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.42655158042907715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2515013813972473},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1761438250541687},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tkde.2005.55","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2005.55","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.58.8844","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.8844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://people.cs.vt.edu/~ramakris/papers/Proximus_TKDE.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.85.3032","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.85.3032","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.purdue.edu/homes/koyuturk/papers/Proximus_TKDE.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W207882028","https://openalex.org/W244390303","https://openalex.org/W1484413656","https://openalex.org/W1492327544","https://openalex.org/W1506285740","https://openalex.org/W1507626491","https://openalex.org/W1548802052","https://openalex.org/W1576778118","https://openalex.org/W1597561788","https://openalex.org/W1601142477","https://openalex.org/W1603596181","https://openalex.org/W1612003148","https://openalex.org/W1965137541","https://openalex.org/W1966382716","https://openalex.org/W1972221303","https://openalex.org/W1977735777","https://openalex.org/W1985475123","https://openalex.org/W1994326354","https://openalex.org/W2041674806","https://openalex.org/W2043390904","https://openalex.org/W2049633694","https://openalex.org/W2057712948","https://openalex.org/W2070232376","https://openalex.org/W2072773380","https://openalex.org/W2104731482","https://openalex.org/W2116007667","https://openalex.org/W2125412267","https://openalex.org/W2127218421","https://openalex.org/W2148694408","https://openalex.org/W2150718242","https://openalex.org/W2335487254","https://openalex.org/W2886661658","https://openalex.org/W2913399920","https://openalex.org/W4205778870","https://openalex.org/W4232283483","https://openalex.org/W4244017338","https://openalex.org/W4292023222","https://openalex.org/W6608601624","https://openalex.org/W6628750762","https://openalex.org/W6635637936","https://openalex.org/W6636440780","https://openalex.org/W6678833645","https://openalex.org/W6678914141","https://openalex.org/W6753898695"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W3092506759","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W120741642","https://openalex.org/W138569904","https://openalex.org/W2390914021","https://openalex.org/W2389417819"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"an":[3],"efficient":[4],"framework":[5],"for":[6,43,55,148,172,206],"error-bounded":[7],"compression":[8,38],"of":[9,24,28,65,80,86,90,101,114,122,131,144,164,180,189,193],"high-dimensional":[10],"discrete-attribute":[11],"data":[12,15,34,46,58,95,105,139,147,209],"sets.":[13,47],"Such":[14],"sets,":[16,96],"which":[17,69],"frequently":[18],"arise":[19],"in":[20,33,88,99,106,124,186],"a":[21,53,61,107,162,204],"wide":[22],"variety":[23],"applications,":[25],"pose":[26],"some":[27],"the":[29,145,169,181,187],"most":[30],"significant":[31],"challenges":[32],"analysis.":[35],"Subsampling":[36],"and":[37,97,110,112,128,155,191,195,215],"are":[39],"two":[40],"key":[41],"technologies":[42],"analyzing":[44],"these":[45],"The":[48],"proposed":[49],"framework,":[50],"PROXIMUS,":[51],"provides":[52,152],"technique":[54],"reducing":[56,168],"large":[57,94],"sets":[59,140],"into":[60],"much":[62],"smaller":[63],"set":[64],"representative":[66],"patterns,":[67],"on":[68,137],"traditional":[70],"(expensive)":[71],"analysis":[72,173],"algorithms":[73,214],"can":[74],"be":[75],"applied":[76],"with":[77],"minimal":[78],"loss":[79],"accuracy.":[81],"We":[82,117,175],"show":[83,141,177],"desirable":[84],"properties":[85],"PROXIMUS":[87,123,185,202],"terms":[89,100,194],"runtime,":[91],"scalability":[92],"to":[93,103],"performance":[98],"capability":[102],"represent":[104],"compact":[108],"form":[109],"discovery":[111],"interpretation":[113],"interesting":[115],"patterns.":[116,219],"also":[118,176],"demonstrate":[119],"sample":[120],"applications":[121],"association":[125,149],"rule":[126,150],"mining":[127,151],"semantic":[129],"classification":[130,192],"term-document":[132],"matrices.":[133],"Our":[134],"experimental":[135],"results":[136],"real":[138],"that":[142],"use":[143],"compressed":[146],"excellent":[153,178],"precision":[154],"recall":[156],"values":[157],"(above":[158],"90":[159],"percent)":[160],"across":[161],"range":[163],"problem":[165],"parameters":[166],"while":[167],"time":[170],"required":[171],"drastically.":[174],"interpretability":[179],"patterns":[182],"discovered":[183],"by":[184],"context":[188],"clustering":[190],"documents.":[196],"In":[197],"doing":[198],"so,":[199],"we":[200],"establish":[201],"as":[203],"tool":[205],"both":[207],"preprocessing":[208],"before":[210],"applying":[211],"computationally":[212],"expensive":[213],"directly":[216],"extracting":[217],"correlated":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
