{"id":"https://openalex.org/W1978036582","doi":"https://doi.org/10.1145/1297332.1297338","title":"Assessing data mining results via swap randomization","display_name":"Assessing data mining results via swap randomization","publication_year":2007,"publication_date":"2007-12-01","ids":{"openalex":"https://openalex.org/W1978036582","doi":"https://doi.org/10.1145/1297332.1297338","mag":"1978036582"},"language":"en","primary_location":{"id":"doi:10.1145/1297332.1297338","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1297332.1297338","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022164041","display_name":"Aristides Gionis","orcid":"https://orcid.org/0000-0002-5211-112X"},"institutions":[{"id":"https://openalex.org/I2800095910","display_name":"Yahoo (Spain)","ror":"https://ror.org/03gq8sg42","country_code":"ES","type":"company","lineage":["https://openalex.org/I2800095910","https://openalex.org/I4210134091"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Aristides Gionis","raw_affiliation_strings":["Yahoo! Research, Barcelona, Spain","Yahoo&excl; Research, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Yahoo! Research, Barcelona, Spain","institution_ids":["https://openalex.org/I2800095910"]},{"raw_affiliation_string":"Yahoo&excl; Research, Barcelona, Spain","institution_ids":["https://openalex.org/I2800095910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013005096","display_name":"Heikki Mannila","orcid":null},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Heikki Mannila","raw_affiliation_strings":["University of Helsinki and Helsinki University of Technology, Helsinki, Finland","University of Helsinki and Helsinki University of Technology, Helsinki, Finland#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Helsinki and Helsinki University of Technology, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]},{"raw_affiliation_string":"University of Helsinki and Helsinki University of Technology, Helsinki, Finland#TAB#","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047974472","display_name":"Taneli Mielik\u00e4inen","orcid":null},"institutions":[{"id":"https://openalex.org/I72090969","display_name":"Nokia (United States)","ror":"https://ror.org/038km2573","country_code":"US","type":"company","lineage":["https://openalex.org/I2738502077","https://openalex.org/I72090969"]},{"id":"https://openalex.org/I173498003","display_name":"Palo Alto Research Center","ror":"https://ror.org/0529fxt39","country_code":"US","type":"facility","lineage":["https://openalex.org/I173498003","https://openalex.org/I4210132870"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taneli Mielik\u00e4inen","raw_affiliation_strings":["Nokia Research Center, Palo Alto, CA","Nokia Research Center., Palo Alto, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Nokia Research Center, Palo Alto, CA","institution_ids":["https://openalex.org/I173498003","https://openalex.org/I72090969"]},{"raw_affiliation_string":"Nokia Research Center., Palo Alto, CA#TAB#","institution_ids":["https://openalex.org/I173498003","https://openalex.org/I72090969"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066962453","display_name":"Panayiotis Tsaparas","orcid":"https://orcid.org/0000-0002-3490-1507"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210133358","display_name":"Search","ror":"https://ror.org/03f78hn46","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210133358"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Panayiotis Tsaparas","raw_affiliation_strings":["Search Labs, Microsoft Research, Mountain View, CA"],"affiliations":[{"raw_affiliation_string":"Search Labs, Microsoft Research, Mountain View, CA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210133358"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5022164041"],"corresponding_institution_ids":["https://openalex.org/I2800095910"],"apc_list":null,"apc_paid":null,"fwci":11.9971,"has_fulltext":false,"cited_by_count":219,"citation_normalized_percentile":{"value":0.98937034,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":"1","issue":"3","first_page":"14","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7017607688903809},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6824958324432373},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.536466121673584},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5336962938308716},{"id":"https://openalex.org/keywords/swap","display_name":"Swap (finance)","score":0.47968578338623047},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3208855092525482},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.28232336044311523}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7017607688903809},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6824958324432373},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.536466121673584},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5336962938308716},{"id":"https://openalex.org/C99821215","wikidata":"https://www.wikidata.org/wiki/Q1136583","display_name":"Swap (finance)","level":2,"score":0.47968578338623047},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3208855092525482},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28232336044311523},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1297332.1297338","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1297332.1297338","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.141.2607","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.141.2607","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.helsinki.fi/u/tmielika/pub/doc/swaps.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.76.3286","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.76.3286","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.helsinki.fi/u/gionis/papers/kdd06a.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2842395","https://openalex.org/W109881820","https://openalex.org/W1509162044","https://openalex.org/W1518647442","https://openalex.org/W1542164709","https://openalex.org/W1592987584","https://openalex.org/W1619437807","https://openalex.org/W1976352460","https://openalex.org/W1979943645","https://openalex.org/W1984941527","https://openalex.org/W2003554015","https://openalex.org/W2007278728","https://openalex.org/W2008881019","https://openalex.org/W2009449033","https://openalex.org/W2011756264","https://openalex.org/W2021903957","https://openalex.org/W2032262903","https://openalex.org/W2037835214","https://openalex.org/W2040820996","https://openalex.org/W2045816045","https://openalex.org/W2048570414","https://openalex.org/W2056760934","https://openalex.org/W2066277072","https://openalex.org/W2083991698","https://openalex.org/W2105494575","https://openalex.org/W2124533460","https://openalex.org/W2131020804","https://openalex.org/W2138309709","https://openalex.org/W2148606196","https://openalex.org/W2153624566","https://openalex.org/W2157520643","https://openalex.org/W2159123127","https://openalex.org/W2210278139","https://openalex.org/W4210309498","https://openalex.org/W4235234743","https://openalex.org/W4246168648"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W3125011624","https://openalex.org/W1508631387","https://openalex.org/W2370917603","https://openalex.org/W614339039","https://openalex.org/W2017776670","https://openalex.org/W2952760143","https://openalex.org/W2347897961","https://openalex.org/W2979236518"],"abstract_inverted_index":{"The":[0,99],"problem":[1],"of":[2,6,49,75,81,102,121,147,151,173,193,230],"assessing":[3],"the":[4,19,47,55,61,67,108,115,119,124,131,134,145,191,199,214,218,225,231,237,247],"significance":[5,31],"data":[7,82,152,219],"mining":[8,25,83,153,220],"results":[9,48,80,120,132,146,189,208],"on":[10,54,60,123,133,181,190],"high-dimensional":[11],"0--1":[12],"datasets":[13,105,166,213,236],"has":[14],"been":[15],"studied":[16],"extensively":[17],"in":[18],"literature.":[20],"For":[21],"problems":[22],"such":[23,40,50,155],"as":[24,41,63,114,156],"frequent":[26,157],"sets":[27,74],"and":[28,58,111,127,160,197,227],"finding":[29],"correlations,":[30],"testing":[32],"can":[33,140],"be":[34,141],"done":[35],"by":[36,217,246],"standard":[37],"statistical":[38],"tests":[39,51,68],"chi-square,":[42],"or":[43,77],"other":[44,78,235],"methods.":[45],"However,":[46],"depend":[52],"only":[53],"specific":[56],"attributes":[57],"not":[59,244],"dataset":[62],"a":[64,90,174,182],"whole.":[65],"Moreover,":[66],"are":[69],"difficult":[70],"to":[71,73,130,143,203],"apply":[72,198],"patterns":[76],"complex":[79],"algorithms.":[84],"In":[85],"this":[86,97],"article,":[87],"we":[88,170],"consider":[89],"simple":[91,183],"randomization":[92,138,195,201],"technique":[93,139],"that":[94,106,210,242],"deals":[95],"with":[96,167],"shortcoming.":[98],"approach":[100,177],"consists":[101],"producing":[103],"random":[104,165],"have":[107],"same":[109],"row":[110,226],"column":[112,228],"margins":[113,229],"given":[116,168,224],"dataset,":[117],"computing":[118],"interest":[122],"randomized":[125],"instances":[126],"comparing":[128],"them":[129],"actual":[135],"data.":[136],"This":[137],"used":[142],"assess":[144],"many":[148],"different":[149,194],"types":[150],"algorithms,":[154],"sets,":[158],"clustering,":[159],"spectral":[161],"analysis.":[162],"To":[163],"generate":[164],"margins,":[169],"use":[171],"variations":[172],"Markov":[175],"chain":[176],"which":[178],"is":[179,222,243],"based":[180],"swap":[184,200],"operation.":[185],"We":[186],"give":[187],"theoretical":[188],"efficiency":[192],"methods,":[196],"method":[202],"several":[204],"well-known":[205],"datasets.":[206],"Our":[207],"indicate":[209],"for":[211,234],"some":[212],"structure":[215,239],"discovered":[216,238],"algorithms":[221],"expected,":[223],"datasets,":[232],"while":[233],"conveys":[240],"information":[241],"captured":[245],"margin":[248],"counts.":[249]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":19},{"year":2015,"cited_by_count":17},{"year":2014,"cited_by_count":18},{"year":2013,"cited_by_count":24},{"year":2012,"cited_by_count":14}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
