{"id":"https://openalex.org/W7138458924","doi":"https://doi.org/10.48550/arxiv.2603.14994","title":"DP-S4S: Accurate and Scalable Select-Join-Aggregate Query Processing with User-Level Differential Privacy","display_name":"DP-S4S: Accurate and Scalable Select-Join-Aggregate Query Processing with User-Level Differential Privacy","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138458924","doi":"https://doi.org/10.48550/arxiv.2603.14994"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.14994","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14994","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.14994","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129739516","display_name":"Yuan Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qiu, Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010903591","display_name":"Xiaokui Xiao","orcid":"https://orcid.org/0000-0003-0914-4580"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Xiaokui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129738241","display_name":"Yin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5129739516"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.7242000102996826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.7242000102996826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.13809999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.05660000070929527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/differential-privacy","display_name":"Differential privacy","score":0.7907000184059143},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7573000192642212},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6489999890327454},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5891000032424927},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.4065000116825104},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.40290001034736633},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.39399999380111694},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.3409000039100647}],"concepts":[{"id":"https://openalex.org/C23130292","wikidata":"https://www.wikidata.org/wiki/Q5275358","display_name":"Differential privacy","level":2,"score":0.7907000184059143},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7735999822616577},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7573000192642212},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6489999890327454},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5891000032424927},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45239999890327454},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.4065000116825104},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.39399999380111694},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3652999997138977},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32580000162124634},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3231000006198883},{"id":"https://openalex.org/C24028149","wikidata":"https://www.wikidata.org/wiki/Q7094056","display_name":"Online aggregation","level":5,"score":0.3172000050544739},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.14994","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14994","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.14994","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14994","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Answering":[0],"Select-Join-Aggregate":[1],"queries":[2],"with":[3,9,36,123,163,178,242],"DP":[4,21,196],"is":[5,70,92,100,149],"a":[6,75,93,113,151,179,211],"fundamental":[7],"problem":[8],"important":[10],"applications":[11],"in":[12,105,125],"various":[13],"domains.":[14],"The":[15],"current":[16],"SOTA":[17],"methods":[18],"ensure":[19],"user-level":[20,270],"(i.e.,":[22],"the":[23,27,44,109,138,184,194,216,230],"adversary":[24],"cannot":[25],"infer":[26],"presence":[28],"or":[29],"absence":[30],"of":[31,225],"any":[32],"given":[33,114],"individual":[34],"user":[35],"high":[37,181,274],"confidence)":[38],"and":[39,81,127,158,227,250],"achieve":[40,68,108],"instance-optimal":[41],"accuracy":[42,111],"on":[43,256,266],"query":[45],"results.":[46],"However,":[47,84],"these":[48],"solutions":[49,119,165],"involve":[50],"solving":[51],"expensive":[52],"optimization":[53],"programs,":[54],"which":[55,73,238],"may":[56],"incur":[57],"prohibitive":[58],"computational":[59,82,115],"overhead":[60],"for":[61,95,208,233],"large":[62,267],"databases.":[63],"One":[64],"promising":[65],"direction":[66],"to":[67,87,103,107,145,166],"scalability":[69],"through":[71],"sampling,":[72],"provides":[74],"tunable":[76],"trade-off":[77],"between":[78],"result":[79,275],"utility":[80],"costs.":[83],"applying":[85],"sampling":[86,124,144,161,221],"differentially":[88],"private":[89,146],"SJA":[90,147,234,252,264],"processing":[91,148,235,265],"challenge":[94],"two":[96],"reasons.":[97],"First,":[98],"it":[99],"unclear":[101],"what":[102],"sample,":[104],"order":[106],"best":[110],"within":[112],"budget.":[116],"Second,":[117],"prior":[118],"were":[120],"not":[121,133],"designed":[122],"mind,":[126],"their":[128],"mathematical":[129,231],"tool":[130],"chains":[131],"are":[132,173],"sampling-friendly.":[134],"To":[135],"our":[136],"knowledge,":[137],"only":[139],"known":[140],"solution":[141],"that":[142,154,171,214,260],"applies":[143],"S&amp;E,":[150],"recent":[152],"proposal":[153],"(i)":[155,220],"samples":[156],"users":[157],"(ii)":[159,228],"combines":[160],"directly":[162],"existing":[164],"enforce":[167],"DP.":[168],"We":[169],"show":[170],"both":[172,248],"suboptimal":[174],"designs;":[175],"consequently,":[176],"even":[177],"relatively":[180],"sample":[182],"rate,":[183],"error":[185],"incurred":[186],"by":[187,201,219],"S&amp;E":[188],"can":[189,246],"be":[190],"10x":[191],"higher":[192],"than":[193],"underlying":[195],"mechanism":[197,213],"without":[198],"sampling.":[199,243],"Motivated":[200],"this,":[202],"we":[203],"propose":[204],"Differentially":[205],"Private":[206],"Sampling":[207],"Scale":[209],"(DP-S4S),":[210],"novel":[212],"addresses":[215],"above":[217],"challenges":[218],"aggregation":[222],"units":[223],"instead":[224],"users,":[226],"laying":[229],"foundation":[232],"under":[236,269],"RDP,":[237],"composes":[239],"more":[240],"easily":[241],"Further,":[244],"DP-S4S":[245,261],"answer":[247],"scalar":[249],"vector":[251],"queries.":[253],"Extensive":[254],"experiments":[255],"real":[257],"data":[258],"demonstrate":[259],"enables":[262],"scalable":[263],"datasets":[268],"DP,":[271],"while":[272],"maintaining":[273],"utility.":[276]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
