{"id":"https://openalex.org/W4386123435","doi":"https://doi.org/10.14778/3611479.3611480","title":"A Two-Level Signature Scheme for Stable Set Similarity Joins","display_name":"A Two-Level Signature Scheme for Stable Set Similarity Joins","publication_year":2023,"publication_date":"2023-07-01","ids":{"openalex":"https://openalex.org/W4386123435","doi":"https://doi.org/10.14778/3611479.3611480"},"language":"en","primary_location":{"id":"doi:10.14778/3611479.3611480","is_oa":false,"landing_page_url":"http://dx.doi.org/10.14778/3611479.3611480","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010481758","display_name":"Daniel Schmitt","orcid":"https://orcid.org/0009-0005-7656-7526"},"institutions":[{"id":"https://openalex.org/I182212641","display_name":"University of Salzburg","ror":"https://ror.org/05gs8cd61","country_code":"AT","type":"education","lineage":["https://openalex.org/I182212641"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Daniel Schmitt","raw_affiliation_strings":["University of Salzburg, Austria"],"affiliations":[{"raw_affiliation_string":"University of Salzburg, Austria","institution_ids":["https://openalex.org/I182212641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076017218","display_name":"Daniel Kocher","orcid":"https://orcid.org/0009-0003-3742-5555"},"institutions":[{"id":"https://openalex.org/I182212641","display_name":"University of Salzburg","ror":"https://ror.org/05gs8cd61","country_code":"AT","type":"education","lineage":["https://openalex.org/I182212641"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Daniel Kocher","raw_affiliation_strings":["University of Salzburg, Austria"],"affiliations":[{"raw_affiliation_string":"University of Salzburg, Austria","institution_ids":["https://openalex.org/I182212641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089328860","display_name":"Nikolaus Augsten","orcid":"https://orcid.org/0000-0002-3036-6201"},"institutions":[{"id":"https://openalex.org/I182212641","display_name":"University of Salzburg","ror":"https://ror.org/05gs8cd61","country_code":"AT","type":"education","lineage":["https://openalex.org/I182212641"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Nikolaus Augsten","raw_affiliation_strings":["University of Salzburg, Austria"],"affiliations":[{"raw_affiliation_string":"University of Salzburg, Austria","institution_ids":["https://openalex.org/I182212641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073874366","display_name":"Willi Mann","orcid":"https://orcid.org/0000-0002-6460-6306"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Willi Mann","raw_affiliation_strings":["Celonis SE, Germany"],"affiliations":[{"raw_affiliation_string":"Celonis SE, Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017559472","display_name":"Alexander R. Miller","orcid":"https://orcid.org/0009-0001-0701-0426"},"institutions":[{"id":"https://openalex.org/I182212641","display_name":"University of Salzburg","ror":"https://ror.org/05gs8cd61","country_code":"AT","type":"education","lineage":["https://openalex.org/I182212641"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Alexander Miller","raw_affiliation_strings":["University of Salzburg, Austria"],"affiliations":[{"raw_affiliation_string":"University of Salzburg, Austria","institution_ids":["https://openalex.org/I182212641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010481758"],"corresponding_institution_ids":["https://openalex.org/I182212641"],"apc_list":null,"apc_paid":null,"fwci":0.5094,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68511099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"16","issue":"11","first_page":"2686","last_page":"2698"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.8450559377670288},{"id":"https://openalex.org/keywords/signature","display_name":"Signature (topology)","score":0.7405459880828857},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.7129318118095398},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6785150170326233},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5813400149345398},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5533426403999329},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5180020928382874},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5171158313751221},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4925263822078705},{"id":"https://openalex.org/keywords/elgamal-signature-scheme","display_name":"ElGamal signature scheme","score":0.4731375277042389},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.41860640048980713},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41157066822052},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3618273138999939},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33903324604034424},{"id":"https://openalex.org/keywords/digital-signature","display_name":"Digital signature","score":0.29915958642959595},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23390436172485352},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.22456499934196472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19396370649337769},{"id":"https://openalex.org/keywords/blind-signature","display_name":"Blind signature","score":0.15136203169822693}],"concepts":[{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.8450559377670288},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.7405459880828857},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.7129318118095398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6785150170326233},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5813400149345398},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5533426403999329},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5180020928382874},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5171158313751221},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4925263822078705},{"id":"https://openalex.org/C132860680","wikidata":"https://www.wikidata.org/wiki/Q1328731","display_name":"ElGamal signature scheme","level":5,"score":0.4731375277042389},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.41860640048980713},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41157066822052},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3618273138999939},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33903324604034424},{"id":"https://openalex.org/C118463975","wikidata":"https://www.wikidata.org/wiki/Q220849","display_name":"Digital signature","level":3,"score":0.29915958642959595},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23390436172485352},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.22456499934196472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19396370649337769},{"id":"https://openalex.org/C18899389","wikidata":"https://www.wikidata.org/wiki/Q2736593","display_name":"Blind signature","level":4,"score":0.15136203169822693},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3611479.3611480","is_oa":false,"landing_page_url":"http://dx.doi.org/10.14778/3611479.3611480","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2016273390","https://openalex.org/W2037562342","https://openalex.org/W2065259291","https://openalex.org/W2073167797","https://openalex.org/W2096598900","https://openalex.org/W2097776316","https://openalex.org/W2105436061","https://openalex.org/W2121269638","https://openalex.org/W2121516976","https://openalex.org/W2122196799","https://openalex.org/W2147717514","https://openalex.org/W2167847032","https://openalex.org/W2261895596","https://openalex.org/W2275236596","https://openalex.org/W2294331997","https://openalex.org/W2396588571","https://openalex.org/W2430378630","https://openalex.org/W2574633002","https://openalex.org/W2795518213","https://openalex.org/W2798412430","https://openalex.org/W2903672378","https://openalex.org/W2948163032","https://openalex.org/W2963535486","https://openalex.org/W2964085916","https://openalex.org/W2982188423","https://openalex.org/W3102192406","https://openalex.org/W3147341252","https://openalex.org/W4281867900","https://openalex.org/W4300699024"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2012019886","https://openalex.org/W2091133150","https://openalex.org/W2158013543","https://openalex.org/W73805934","https://openalex.org/W2883952940","https://openalex.org/W2151838902","https://openalex.org/W1512094939","https://openalex.org/W2127939186","https://openalex.org/W3190616538"],"abstract_inverted_index":{"We":[0,53,128,151],"study":[1],"the":[2,56,59,67,78,83,86,101,118,141,145,148],"set":[3,183],"similarity":[4,162,184],"join":[5,173],"problem":[6],",":[7],"which":[8],"retrieves":[9],"all":[10],"pairs":[11],"of":[12,18,58,85,105,147,197],"similar":[13],"sets":[14,19,35],"from":[15],"two":[16,34,156],"collections":[17],"for":[20,160],"a":[21,29,63,71,95,123,130,136,187,194],"given":[22],"distance":[23],"function.":[24],"Existing":[25],"exact":[26],"solutions":[27],"employ":[28],"signature-based":[30],"filter-verification":[31],"framework:":[32],"If":[33],"are":[36,115,120],"similar,":[37],"they":[38,48],"must":[39],"have":[40],"at":[41],"least":[42],"one":[43],"signature":[44,60,73,97,107,114,149,158],"in":[45,167],"common,":[46],"otherwise":[47],"can":[49],"be":[50],"pruned":[51],"safely.":[52],"observe":[54],"that":[55,99,172,192],"choice":[57],"scheme":[61,74],"has":[62],"significant":[64],"impact":[65],"on":[66,82,186],"performance.":[68],"Unfortunately,":[69],"choosing":[70],"good":[72],"is":[75],"hard":[76],"because":[77],"performance":[79],"heavily":[80],"depends":[81],"characteristics":[84,177],"underlying":[87],"dataset.":[88],"To":[89],"address":[90],"this":[91],"problem,":[92],"we":[93],"propose":[94,129],"hybrid":[96,170],"composition":[98],"leverages":[100],"most":[102],"selective":[103,125],"portion":[104],"each":[106],"scheme.":[108],"Sets":[109],"with":[110,122,155,175,189],"an":[111],"unselective":[112],"primary":[113],"detected,":[116],"and":[117,135,144,163],"signatures":[119],"replaced":[121],"more":[124],"secondary":[126],"signature.":[127],"generic":[131],"framework":[132,154],"called":[133],"TwoL":[134,179],"cost":[137],"model":[138],"to":[139],"balance":[140],"computational":[142],"overhead":[143],"selectivity":[146],"schemes.":[150],"implement":[152],"our":[153],"complementary":[157],"schemes":[159],"Jaccard":[161],"Hamming":[164],"distance,":[165],"resulting":[166],"effective":[168],"two-level":[169],"indexes":[171],"datasets":[174,191],"diverse":[176],"efficiently.":[178],"consistently":[180],"outperforms":[181],"state-of-the-art":[182],"joins":[185],"benchmark":[188],"13":[190],"cover":[193],"wide":[195],"range":[196],"data":[198],"characteristics.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
