{"id":"https://openalex.org/W4414266950","doi":"https://doi.org/10.14778/3750601.3750661","title":"RadlER: Deduplicated Sampling On-Demand","display_name":"RadlER: Deduplicated Sampling On-Demand","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4414266950","doi":"https://doi.org/10.14778/3750601.3750661"},"language":"en","primary_location":{"id":"doi:10.14778/3750601.3750661","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750661","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/11380/1388118","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061991280","display_name":"Luca Zecchini","orcid":"https://orcid.org/0000-0002-4856-0838"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Luca Zecchini","raw_affiliation_strings":["BIFOLD &amp; TU Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD &amp; TU Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009128577","display_name":"Ziawasch Abedjan","orcid":"https://orcid.org/0000-0002-2846-1373"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ziawasch Abedjan","raw_affiliation_strings":["BIFOLD &amp; TU Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD &amp; TU Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073440687","display_name":"Vasilis Efthymiou","orcid":"https://orcid.org/0000-0002-0683-030X"},"institutions":[{"id":"https://openalex.org/I32762134","display_name":"Harokopio University of Athens","ror":"https://ror.org/02k5gp281","country_code":"GR","type":"education","lineage":["https://openalex.org/I32762134"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vasilis Efthymiou","raw_affiliation_strings":["Harokopio University, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Harokopio University, Athens, Greece","institution_ids":["https://openalex.org/I32762134"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047630333","display_name":"Giovanni Simonini","orcid":"https://orcid.org/0000-0002-3466-509X"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giovanni Simonini","raw_affiliation_strings":["University of Modena and Reggio Emilia, Italy"],"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia, Italy","institution_ids":["https://openalex.org/I122346577"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5061991280"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27883571,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"12","first_page":"5319","last_page":"5322"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.7235000133514404},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5684000253677368},{"id":"https://openalex.org/keywords/lot-quality-assurance-sampling","display_name":"Lot quality assurance sampling","score":0.43970000743865967},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4390999972820282},{"id":"https://openalex.org/keywords/sampling-bias","display_name":"Sampling bias","score":0.42179998755455017},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.39719998836517334},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.37139999866485596}],"concepts":[{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.7235000133514404},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5989999771118164},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5684000253677368},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.525600016117096},{"id":"https://openalex.org/C195454712","wikidata":"https://www.wikidata.org/wiki/Q17133861","display_name":"Lot quality assurance sampling","level":4,"score":0.43970000743865967},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4390999972820282},{"id":"https://openalex.org/C75917345","wikidata":"https://www.wikidata.org/wiki/Q2725298","display_name":"Sampling bias","level":3,"score":0.42179998755455017},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.39719998836517334},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3416000008583069},{"id":"https://openalex.org/C75373757","wikidata":"https://www.wikidata.org/wiki/Q7410160","display_name":"Sampling design","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C167723999","wikidata":"https://www.wikidata.org/wiki/Q3773214","display_name":"Sampling distribution","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.2741999924182892}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/3750601.3750661","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750661","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:iris.unimore.it:11380/1388118","is_oa":true,"landing_page_url":"https://hdl.handle.net/11380/1388118","pdf_url":null,"source":{"id":"https://openalex.org/S4377196326","display_name":"Iris Unimore (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:iris.unimore.it:11380/1388118","is_oa":true,"landing_page_url":"https://hdl.handle.net/11380/1388118","pdf_url":null,"source":{"id":"https://openalex.org/S4377196326","display_name":"Iris Unimore (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2287926972","https://openalex.org/W2498260651","https://openalex.org/W2962856906","https://openalex.org/W2963809228","https://openalex.org/W3014295153","https://openalex.org/W3181414820","https://openalex.org/W4283312893","https://openalex.org/W4386125399","https://openalex.org/W4386768637","https://openalex.org/W4413943470"],"related_works":[],"abstract_inverted_index":{"Data":[0],"practitioners":[1,124],"often":[2],"need":[3],"to":[4,8,51,75,82,93,109,132],"sample":[5,86],"their":[6,13,126],"datasets":[7,18],"produce":[9,83],"representative":[10],"subsets":[11],"for":[12],"downstream":[14,38],"tasks.":[15,39],"Unfortunately,":[16],"real-world":[17],"frequently":[19],"contain":[20],"duplicates,":[21],"whose":[22],"presence":[23],"biases":[24],"sampling":[25,53,78],"and":[26,66,139],"impacts":[27],"the":[28,31,35,48,102,112],"quality":[29],"of":[30,37,64,87,97,137],"produced":[32],"subsets,":[33],"hence":[34],"outcome":[36],"While":[40],"deduplication":[41],"is":[42],"therefore":[43],"fundamental,":[44],"performing":[45],"it":[46],"on":[47,54,106],"entire":[49],"dataset":[50,90],"run":[52],"its":[55],"cleaned":[56],"version":[57],"might":[58],"be":[59],"prohibitively":[60],"expensive":[61],"in":[62,111,125],"terms":[63],"time":[65,138],"resources.":[67,140],"Thus,":[68],"we":[69,117],"recently":[70],"introduced":[71],"RadlER,":[72],"a":[73,84,88,94,134],"solution":[74],"perform":[76],"deduplicated":[77],"on-demand":[79],",":[80],"i.e.,":[81],"clean":[85],"dirty":[89],"incrementally,":[91],"according":[92],"target":[95],"distribution":[96],"some":[98],"subpopulations,":[99],"by":[100],"focusing":[101],"cleaning":[103],"effort":[104],"only":[105],"entities":[107],"required":[108],"appear":[110],"sample.":[113],"In":[114],"this":[115],"demonstration,":[116],"interactively":[118],"show":[119],"how":[120],"RadlER":[121],"can":[122],"support":[123],"data":[127],"science":[128],"pipelines,":[129],"allowing":[130],"them":[131],"save":[133],"relevant":[135],"amount":[136]},"counts_by_year":[],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2025-10-10T00:00:00"}
