{"id":"https://openalex.org/W2082276182","doi":"https://doi.org/10.1145/2382577.2382580","title":"Summarizing data succinctly with the most informative itemsets","display_name":"Summarizing data succinctly with the most informative itemsets","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2082276182","doi":"https://doi.org/10.1145/2382577.2382580","mag":"2082276182"},"language":"en","primary_location":{"id":"doi:10.1145/2382577.2382580","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2382577.2382580","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1904.11134","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Michael Mampaey","orcid":null},"institutions":[{"id":"https://openalex.org/I149213910","display_name":"University of Antwerp","ror":"https://ror.org/008x57b05","country_code":"BE","type":"education","lineage":["https://openalex.org/I149213910"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Michael Mampaey","raw_affiliation_strings":["University of Antwerp, Antwerp, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Antwerp, Antwerp, Belgium","institution_ids":["https://openalex.org/I149213910"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jilles Vreeken","orcid":null},"institutions":[{"id":"https://openalex.org/I149213910","display_name":"University of Antwerp","ror":"https://ror.org/008x57b05","country_code":"BE","type":"education","lineage":["https://openalex.org/I149213910"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Jilles Vreeken","raw_affiliation_strings":["University of Antwerp, Antwerp, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Antwerp, Antwerp, Belgium","institution_ids":["https://openalex.org/I149213910"]}]},{"author_position":"last","author":{"id":null,"display_name":"Nikolaj Tatti","orcid":null},"institutions":[{"id":"https://openalex.org/I149213910","display_name":"University of Antwerp","ror":"https://ror.org/008x57b05","country_code":"BE","type":"education","lineage":["https://openalex.org/I149213910"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Nikolaj Tatti","raw_affiliation_strings":["University of Antwerp, Antwerp, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Antwerp, Antwerp, Belgium","institution_ids":["https://openalex.org/I149213910"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I149213910"],"apc_list":null,"apc_paid":null,"fwci":9.5519,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.97622554,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"6","issue":"4","first_page":"1","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.616599977016449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.616599977016449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.10329999774694443,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.07000000029802322,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6998000144958496},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.6272000074386597},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5396999716758728},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.5038999915122986},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4699999988079071},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.4165000021457672},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.40779998898506165},{"id":"https://openalex.org/keywords/minimum-description-length","display_name":"Minimum description length","score":0.38960000872612}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6998000144958496},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6815999746322632},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.6272000074386597},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5877000093460083},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5396999716758728},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5038999915122986},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4699999988079071},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.4165000021457672},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C87465248","wikidata":"https://www.wikidata.org/wiki/Q1417790","display_name":"Minimum description length","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2937999963760376},{"id":"https://openalex.org/C2778865114","wikidata":"https://www.wikidata.org/wiki/Q7882489","display_name":"Uncertain data","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.273499995470047}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2382577.2382580","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2382577.2382580","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1904.11134","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.11134","pdf_url":"https://arxiv.org/pdf/1904.11134","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1904.11134","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.11134","pdf_url":"https://arxiv.org/pdf/1904.11134","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W37234579","https://openalex.org/W1513861746","https://openalex.org/W1534506107","https://openalex.org/W1864487875","https://openalex.org/W1879968191","https://openalex.org/W1973996621","https://openalex.org/W1977977741","https://openalex.org/W1978036582","https://openalex.org/W1979943645","https://openalex.org/W1990951910","https://openalex.org/W1995796255","https://openalex.org/W1996453240","https://openalex.org/W2001792610","https://openalex.org/W2018490621","https://openalex.org/W2029237138","https://openalex.org/W2054658115","https://openalex.org/W2055097264","https://openalex.org/W2055751864","https://openalex.org/W2061015052","https://openalex.org/W2062749891","https://openalex.org/W2064668866","https://openalex.org/W2066277072","https://openalex.org/W2076743544","https://openalex.org/W2099019567","https://openalex.org/W2100783210","https://openalex.org/W2102297485","https://openalex.org/W2111317215","https://openalex.org/W2115364986","https://openalex.org/W2116569872","https://openalex.org/W2124066753","https://openalex.org/W2125521284","https://openalex.org/W2134155244","https://openalex.org/W2135445066","https://openalex.org/W2153818052","https://openalex.org/W2156692937","https://openalex.org/W2160709761","https://openalex.org/W2168175751","https://openalex.org/W2210278139","https://openalex.org/W3003623942","https://openalex.org/W3123545922","https://openalex.org/W4232383088","https://openalex.org/W4235234743","https://openalex.org/W4253654031","https://openalex.org/W4288080327"],"related_works":[],"abstract_inverted_index":{"Knowledge":[0],"discovery":[1,49],"from":[2,225],"data":[3,16,71,102,197,295],"is":[4,217],"an":[5,55,243],"inherently":[6],"iterative":[7,56],"process.":[8],"That":[9],"is,":[10,95],"what":[11,23,207],"we":[12,25,63,83,109,115,143,156,170,241],"know":[13],"about":[14,34,212],"the":[15,35,73,86,91,98,101,105,117,138,141,163,174,180,189,196,213,248,267,275,279,294],"greatly":[17],"determines":[18],"our":[19,37,111,202,257],"expectations,":[20],"and":[21,126,151,231,262,272,288],"therefore,":[22],"results":[24],"would":[26],"find":[27,85],"interesting":[28],"and/or":[29],"surprising.":[30],"Given":[31],"new":[32],"knowledge":[33,48],"data,":[36,264],"expectations":[38],"will":[39,204],"change.":[40],"Hence,":[41],"in":[42,61,100,107,278],"order":[43],"to":[44,121,137,147,186,210,246],"avoid":[45],"redundant":[46],"results,":[47],"algorithms":[50],"ideally":[51],"should":[52],"follow":[53],"such":[54],"updating":[57],"procedure.":[58],"With":[59],"this":[60],"mind,":[62],"introduce":[64],"a":[65,78,218,226],"well-founded":[66],"approach":[67],"for":[68,96],"succinctly":[69],"summarizing":[70],"with":[72,135,297],"most":[74,92,133,166],"informative":[75,134,167],"itemsets;":[76],"using":[77,252,259],"probabilistic":[79,124],"maximum":[80,118,249],"entropy":[81,119,250],"model,":[82,140],"iteratively":[84],"itemset":[87],"that":[88,131,155,193,266,291],"provides":[89],"us":[90,104],"novel":[93],"information\u2014that":[94],"which":[97],"frequency":[99],"surprises":[103],"most\u2014and":[106],"turn":[108],"update":[110],"model":[112],"accordingly.":[113],"As":[114],"use":[116],"principle":[120,185],"obtain":[122],"unbiased":[123],"models,":[125],"only":[127],"include":[128],"those":[129],"itemsets":[130,192,224,230],"are":[132,145,234,270],"regard":[136],"current":[139],"summaries":[142,269],"construct":[144],"guaranteed":[146],"be":[148],"both":[149],"descriptive":[150],"nonredundant.":[152],"The":[153,281],"algorithm":[154],"present,":[157],"called":[158],"mtv,":[159],"can":[160,171],"either":[161,173],"discover":[162],"top-":[164],"k":[165],"itemsets,":[168],"or":[169,179],"employ":[172],"Bayesian":[175],"Information":[176],"Criterion":[177],"(bic)":[178],"Minimum":[181],"Description":[182],"Length":[183],"(mdl)":[184],"automatically":[187],"identify":[188,274],"set":[190],"of":[191],"together":[194],"summarize":[195,293],"well.":[198],"In":[199],"other":[200],"words,":[201],"method":[203,245],"\u201ctell":[205],"you":[206,208],"need":[209],"know\u201d":[211],"data.":[214,280],"Importantly,":[215],"it":[216],"one-phase":[219],"algorithm:":[220],"rather":[221],"than":[222],"picking":[223],"user-provided":[227],"candidate":[228],"set,":[229],"their":[232],"supports":[233],"mined":[235],"on-the-fly.":[236],"To":[237],"further":[238],"its":[239],"applicability,":[240],"provide":[242],"efficient":[244],"compute":[247],"distribution":[251],"Quick":[253],"Inclusion-Exclusion.":[254],"Experiments":[255],"on":[256],"method,":[258],"synthetic,":[260],"benchmark,":[261],"real":[263],"show":[265],"discovered":[268],"succinct,":[271],"correctly":[273],"key":[276],"patterns":[277],"models":[282],"they":[283,292],"form":[284],"attain":[285],"high":[286],"likelihoods,":[287],"inspection":[289],"shows":[290],"well":[296],"increasingly":[298],"specific,":[299],"yet":[300],"nonredundant":[301],"itemsets.":[302]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2016-06-24T00:00:00"}
