{"id":"https://openalex.org/W2621544383","doi":"https://doi.org/10.1145/3085504.3085524","title":"A Unified Correlation-based Approach to Sampling Over Joins","display_name":"A Unified Correlation-based Approach to Sampling Over Joins","publication_year":2017,"publication_date":"2017-06-05","ids":{"openalex":"https://openalex.org/W2621544383","doi":"https://doi.org/10.1145/3085504.3085524","mag":"2621544383"},"language":"en","primary_location":{"id":"doi:10.1145/3085504.3085524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3085504.3085524","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3085524&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=3085524&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026699566","display_name":"Niranjan Kamat","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Niranjan Kamat","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001906560","display_name":"Arnab Nandi","orcid":"https://orcid.org/0000-0002-4138-603X"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arnab Nandi","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5026699566"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":1.1212,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7866229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.8976919651031494},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.8390249013900757},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7340141534805298},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6693753600120544},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.6286870241165161},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6200354099273682},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.49917030334472656},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.446293443441391},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.4428946375846863},{"id":"https://openalex.org/keywords/simple-random-sample","display_name":"Simple random sample","score":0.44285979866981506},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3354184627532959},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3332276940345764},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.271194189786911},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19398117065429688},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09562745690345764}],"concepts":[{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.8976919651031494},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.8390249013900757},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7340141534805298},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6693753600120544},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.6286870241165161},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6200354099273682},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49917030334472656},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.446293443441391},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.4428946375846863},{"id":"https://openalex.org/C20353970","wikidata":"https://www.wikidata.org/wiki/Q1056998","display_name":"Simple random sample","level":3,"score":0.44285979866981506},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3354184627532959},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3332276940345764},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.271194189786911},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19398117065429688},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09562745690345764},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3085504.3085524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3085504.3085524","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3085524&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3085504.3085524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3085504.3085524","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3085524&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1744848353","display_name":null,"funder_award_id":"1422977, 1453582, 1527779","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3058677415","display_name":null,"funder_award_id":"1422977","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6614414788","display_name":null,"funder_award_id":"1453582","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2621544383.pdf","grobid_xml":"https://content.openalex.org/works/W2621544383.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W135195598","https://openalex.org/W153182319","https://openalex.org/W1586832667","https://openalex.org/W1589924190","https://openalex.org/W1594122454","https://openalex.org/W1661188838","https://openalex.org/W1964857063","https://openalex.org/W1966563289","https://openalex.org/W1976338956","https://openalex.org/W1995034419","https://openalex.org/W2020147322","https://openalex.org/W2020584928","https://openalex.org/W2034856714","https://openalex.org/W2043099794","https://openalex.org/W2045938805","https://openalex.org/W2050277572","https://openalex.org/W2063546264","https://openalex.org/W2064777887","https://openalex.org/W2068634907","https://openalex.org/W2071989194","https://openalex.org/W2072145795","https://openalex.org/W2073479529","https://openalex.org/W2079499902","https://openalex.org/W2090403603","https://openalex.org/W2091341267","https://openalex.org/W2097880677","https://openalex.org/W2103212156","https://openalex.org/W2107422095","https://openalex.org/W2110953678","https://openalex.org/W2119885577","https://openalex.org/W2132808937","https://openalex.org/W2136014429","https://openalex.org/W2144200996","https://openalex.org/W2153406069","https://openalex.org/W2153914251","https://openalex.org/W2165990006","https://openalex.org/W2235478132","https://openalex.org/W2242120942","https://openalex.org/W2243803726","https://openalex.org/W2284530523","https://openalex.org/W2296677182","https://openalex.org/W2421547754","https://openalex.org/W2429510775","https://openalex.org/W2440979603","https://openalex.org/W2787688218","https://openalex.org/W3027465205","https://openalex.org/W4229903866","https://openalex.org/W4237172715","https://openalex.org/W4241185933","https://openalex.org/W4249924957"],"related_works":["https://openalex.org/W2393491644","https://openalex.org/W4206577045","https://openalex.org/W3086237447","https://openalex.org/W650102067","https://openalex.org/W2016456293","https://openalex.org/W1550806730","https://openalex.org/W2589740103","https://openalex.org/W2172084996","https://openalex.org/W1966967794","https://openalex.org/W1501284171"],"abstract_inverted_index":{"Supporting":[0],"sampling":[1],"in":[2,11,42,97,127],"the":[3,20,43,48,60,71,89,98,128,141],"presence":[4],"of":[5,70,88],"joins":[6],"is":[7,15,104,121,134],"an":[8],"important":[9],"problem":[10],"data":[12,53,151],"analysis,":[13],"but":[14],"inherently":[16],"challenging":[17],"due":[18],"to":[19,22,113,140,144],"need":[21],"avoid":[23],"correlation":[24,85],"between":[25],"output":[26],"tuples.":[27],"Current":[28],"solutions":[29],"provide":[30,92,107,138],"either":[31],"correlated":[32,64,102,132],"or":[33,51],"non-correlated":[34,44],"samples.":[35],"Sampling":[36],"might":[37,55],"not":[38,67,135],"always":[39],"be":[40,56,68],"feasible":[41],"sampling-based":[45],"approaches":[46],"--":[47],"sample":[49,65,84,103,114,133],"size":[50,54],"intermediate":[52,150],"exceedingly":[57],"large.":[58],"On":[59],"other":[61],"hand,":[62],"a":[63,76,101,131],"may":[66],"representative":[69],"join.":[72],"This":[73],"paper":[74],"presents":[75],"unified":[77],"strategy":[78],"towards":[79],"join":[80,111,120],"sampling,":[81],"while":[82],"considering":[83],"every":[86],"step":[87],"way.":[90],"We":[91],"two":[93],"key":[94],"contributions.":[95],"First,":[96],"case":[99,129],"where":[100,130],"acceptable,":[105,136],"we":[106,137],"techniques,":[108],"for":[109],"all":[110],"types,":[112],"base":[115],"relations":[116],"so":[117],"that":[118],"their":[119,146],"as":[122,124],"random":[123],"possible.":[125],"Second,":[126],"enhancements":[139],"state-of-the-art":[142],"algorithms":[143],"reduce":[145],"execution":[147],"time":[148],"and":[149],"size.":[152]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
