{"id":"https://openalex.org/W4413943470","doi":"https://doi.org/10.14778/3742728.3742742","title":"Deduplicated Sampling On-Demand","display_name":"Deduplicated Sampling On-Demand","publication_year":2025,"publication_date":"2025-04-01","ids":{"openalex":"https://openalex.org/W4413943470","doi":"https://doi.org/10.14778/3742728.3742742"},"language":"en","primary_location":{"id":"doi:10.14778/3742728.3742742","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3742728.3742742","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/11380/1385869","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061991280","display_name":"Luca Zecchini","orcid":"https://orcid.org/0000-0002-4856-0838"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Luca Zecchini","raw_affiliation_strings":["BIFOLD &amp; TU Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD &amp; TU Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073440687","display_name":"Vasilis Efthymiou","orcid":"https://orcid.org/0000-0002-0683-030X"},"institutions":[{"id":"https://openalex.org/I32762134","display_name":"Harokopio University of Athens","ror":"https://ror.org/02k5gp281","country_code":"GR","type":"education","lineage":["https://openalex.org/I32762134"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vasilis Efthymiou","raw_affiliation_strings":["Harokopio University, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Harokopio University, Athens, Greece","institution_ids":["https://openalex.org/I32762134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053028480","display_name":"Felix Naumann","orcid":"https://orcid.org/0000-0002-4483-1389"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Felix Naumann","raw_affiliation_strings":["Hasso Plattner Institute, Potsdam, Germany"],"affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, Potsdam, Germany","institution_ids":["https://openalex.org/I143288331"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047630333","display_name":"Giovanni Simonini","orcid":"https://orcid.org/0000-0002-3466-509X"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giovanni Simonini","raw_affiliation_strings":["University of Modena and Reggio Emilia, Italy"],"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia, Italy","institution_ids":["https://openalex.org/I122346577"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5061991280"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":1.648,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89222359,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"18","issue":"8","first_page":"2482","last_page":"2495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11539","display_name":"Survey Methodology and Nonresponse","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9796000123023987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.47974884510040283},{"id":"https://openalex.org/keywords/environmental-science","display_name":"Environmental science","score":0.36920180916786194},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2708776593208313},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06427925825119019}],"concepts":[{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.47974884510040283},{"id":"https://openalex.org/C39432304","wikidata":"https://www.wikidata.org/wiki/Q188847","display_name":"Environmental science","level":0,"score":0.36920180916786194},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2708776593208313},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06427925825119019},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/3742728.3742742","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3742728.3742742","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:iris.unimore.it:11380/1385869","is_oa":true,"landing_page_url":"https://hdl.handle.net/11380/1385869","pdf_url":null,"source":{"id":"https://openalex.org/S4306400718","display_name":"IRIS UNIMORE (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:iris.unimore.it:11380/1385869","is_oa":true,"landing_page_url":"https://hdl.handle.net/11380/1385869","pdf_url":null,"source":{"id":"https://openalex.org/S4306400718","display_name":"IRIS UNIMORE (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W1975184797","https://openalex.org/W2039789840","https://openalex.org/W2074876483","https://openalex.org/W2078727008","https://openalex.org/W2079649893","https://openalex.org/W2099637074","https://openalex.org/W2100960835","https://openalex.org/W2107966677","https://openalex.org/W2108087318","https://openalex.org/W2113411758","https://openalex.org/W2134592632","https://openalex.org/W2149731317","https://openalex.org/W2168440643","https://openalex.org/W2220488081","https://openalex.org/W2237063244","https://openalex.org/W2295240344","https://openalex.org/W2498260651","https://openalex.org/W2542998387","https://openalex.org/W2548122763","https://openalex.org/W2612526608","https://openalex.org/W2704480242","https://openalex.org/W2748156246","https://openalex.org/W2750620035","https://openalex.org/W2750756391","https://openalex.org/W2775696413","https://openalex.org/W2795151173","https://openalex.org/W2798649495","https://openalex.org/W2885659818","https://openalex.org/W2933723051","https://openalex.org/W2962856906","https://openalex.org/W2963341956","https://openalex.org/W2963809228","https://openalex.org/W3005822199","https://openalex.org/W3008440336","https://openalex.org/W3011807731","https://openalex.org/W3013103751","https://openalex.org/W3014295153","https://openalex.org/W3014705052","https://openalex.org/W3092541244","https://openalex.org/W3092962901","https://openalex.org/W3098360431","https://openalex.org/W3098444442","https://openalex.org/W3102092462","https://openalex.org/W3105771849","https://openalex.org/W3123375411","https://openalex.org/W3136824354","https://openalex.org/W3155638005","https://openalex.org/W3173173856","https://openalex.org/W3181414820","https://openalex.org/W3197182341","https://openalex.org/W3197468999","https://openalex.org/W3205761523","https://openalex.org/W3209119957","https://openalex.org/W4210736086","https://openalex.org/W4226367749","https://openalex.org/W4229641819","https://openalex.org/W4240301789","https://openalex.org/W4242744113","https://openalex.org/W4283312893","https://openalex.org/W4283314192","https://openalex.org/W4300456194","https://openalex.org/W4300807212","https://openalex.org/W4302802341","https://openalex.org/W4321448364","https://openalex.org/W4327743697","https://openalex.org/W4366729173","https://openalex.org/W4379280607","https://openalex.org/W4385762475","https://openalex.org/W4386125399","https://openalex.org/W4386298264","https://openalex.org/W4386768637","https://openalex.org/W4389315089","https://openalex.org/W4389539805","https://openalex.org/W4390723615","https://openalex.org/W4391095066","https://openalex.org/W4397029689","https://openalex.org/W4399567256","https://openalex.org/W4405315676","https://openalex.org/W4407355477","https://openalex.org/W4411487126","https://openalex.org/W6892190348","https://openalex.org/W6892236312","https://openalex.org/W6892257800","https://openalex.org/W6910715999"],"related_works":["https://openalex.org/W2038693912","https://openalex.org/W1991602789","https://openalex.org/W1582396021","https://openalex.org/W2807783496","https://openalex.org/W2051452952","https://openalex.org/W2057866436","https://openalex.org/W3007404728","https://openalex.org/W1977155515","https://openalex.org/W2776417242","https://openalex.org/W1551441281"],"abstract_inverted_index":{"Data":[0],"practitioners":[1],"often":[2],"sample":[3,83,138,182],"their":[4,11],"datasets":[5,121,159],"to":[6,30,40,89,97,134,148,177,188],"produce":[7,31,135,179],"representative":[8],"subsets":[9,32],"for":[10,45,119],"downstream":[12],"tasks.":[13],"When":[14],"entities":[15,146],"in":[16,150],"a":[17,35,42,47,81,85,90,131,136,180,184,189],"dataset":[18,87,105,186],"can":[19,67],"be":[20,116],"partitioned":[21],"into":[22],"multiple":[23,58,158],"groups,":[24],"stratified":[25],"sampling":[26,75,99,109,126],"is":[27,130],"commonly":[28],"used":[29],"that":[33,66,113,151,164],"match":[34],"target":[36,91,190],"group":[37,92,191],"distribution,":[38],"e.g.,":[39],"select":[41],"balanced":[43],"subset":[44],"training":[46],"machine":[48],"learning":[49],"model.":[50],"However,":[51,112],"real-world":[52,63],"data":[53,171],"frequently":[54],"contains":[55],"duplicates":[56],"\u2014":[57,65],"representations":[59],"of":[60,79,84,183],"the":[61,77,103,141],"same":[62],"entity":[64],"bias":[68],"sampling,":[69],"necessitating":[70],"deduplication.":[71],"We":[72],"define":[73],"deduplicated":[74,98],"as":[76],"task":[78],"producing":[80],"clean":[82,137,181],"dirty":[86,185],"according":[88,187],"distribution.":[93,192],"The":[94],"na\u00efve":[95],"approach":[96,114,133],"would":[100],"first":[101],"deduplicate":[102],"entire":[104],"upfront,":[106],"then":[107],"perform":[108],"ex":[110],"post.":[111],"might":[115],"prohibitively":[117],"expensive":[118],"large":[120],"and":[122],"time/resource":[123],"constraints.":[124],"Deduplicated":[125],"ondemand":[127],"with":[128,173],"RadlER":[129,165],"novel":[132],"by":[139],"focusing":[140],"cleaning":[142],"effort":[143],"only":[144],"on":[145,157],"required":[147],"appear":[149],"sample.":[152],"Our":[153],"experimental":[154],"evaluation,":[155],"performed":[156],"from":[160],"different":[161],"domains,":[162],"demonstrates":[163],"consistently":[166],"outperforms":[167],"baseline":[168],"approaches,":[169],"providing":[170],"scientists":[172],"an":[174],"efficient":[175],"solution":[176],"quickly":[178]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
