{"id":"https://openalex.org/W2953288349","doi":"https://doi.org/10.1145/3167970","title":"Estimating the Impact of Unknown Unknowns on Aggregate Query Results","display_name":"Estimating the Impact of Unknown Unknowns on Aggregate Query Results","publication_year":2018,"publication_date":"2018-03-06","ids":{"openalex":"https://openalex.org/W2953288349","doi":"https://doi.org/10.1145/3167970","mag":"2953288349"},"language":"en","primary_location":{"id":"doi:10.1145/3167970","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3167970","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://pure.au.dk/portal/en/publications/b88da962-29dc-41a9-b083-6daf9f611463","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087758743","display_name":"Yeounoh Chung","orcid":"https://orcid.org/0000-0002-6535-9001"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yeounoh Chung","raw_affiliation_strings":["Brown University, Providence, RI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014453263","display_name":"Michael Lind Mortensen","orcid":null},"institutions":[{"id":"https://openalex.org/I204337017","display_name":"Aarhus University","ror":"https://ror.org/01aj84f44","country_code":"DK","type":"education","lineage":["https://openalex.org/I204337017"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Michael Lind Mortensen","raw_affiliation_strings":["Aarhus University, Aarhus C, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aarhus University, Aarhus C, Denmark","institution_ids":["https://openalex.org/I204337017"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073504200","display_name":"Carsten Binnig","orcid":"https://orcid.org/0000-0002-2744-7836"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carsten Binnig","raw_affiliation_strings":["Brown University, Providence, RI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034086130","display_name":"Tim Kraska","orcid":"https://orcid.org/0009-0003-2414-2759"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tim Kraska","raw_affiliation_strings":["Brown University, Providence, RI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087758743"],"corresponding_institution_ids":["https://openalex.org/I27804330"],"apc_list":null,"apc_paid":null,"fwci":0.4403,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.71988499,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"43","issue":"1","first_page":"1","last_page":"37"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8533272743225098},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.825994610786438},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6236701011657715},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.596133291721344},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.5405347347259521},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.5197810530662537},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.47630131244659424},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4579322636127472},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.4451180100440979},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.440640389919281},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16707581281661987},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.1442672312259674},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12005829811096191},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11985135078430176},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08621111512184143}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8533272743225098},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.825994610786438},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6236701011657715},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.596133291721344},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.5405347347259521},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.5197810530662537},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.47630131244659424},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4579322636127472},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4451180100440979},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.440640389919281},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16707581281661987},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.1442672312259674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12005829811096191},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11985135078430176},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08621111512184143},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3167970","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3167970","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/b88da962-29dc-41a9-b083-6daf9f611463","is_oa":true,"landing_page_url":"https://pure.au.dk/portal/en/publications/b88da962-29dc-41a9-b083-6daf9f611463","pdf_url":null,"source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Chung, Y, Mortensen, M L, Binnig, C & Kraska, T 2018, 'Estimating the impact of unknown unknowns on aggregate qery results', ACM Transactions on Database Systems, vol. 43, no. 1, 3. https://doi.org/10.1145/3167970","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:publications/b88da962-29dc-41a9-b083-6daf9f611463","is_oa":true,"landing_page_url":"http://www.scopus.com/inward/record.url?scp=85043760184&partnerID=8YFLogxK","pdf_url":null,"source":{"id":"https://openalex.org/S4306400063","display_name":"Scopus (Elsevier)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Chung, Y, Mortensen, M L, Binnig, C & Kraska, T 2018, 'Estimating the impact of unknown unknowns on aggregate qery results', ACM Transactions on Database Systems, vol. 43, no. 1, 3. https://doi.org/10.1145/3167970","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:openaire/b88da962-29dc-41a9-b083-6daf9f611463","is_oa":true,"landing_page_url":"https://pure.au.dk/portal/en/publications/b88da962-29dc-41a9-b083-6daf9f611463","pdf_url":null,"source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Chung, Y, Mortensen, M L, Binnig, C & Kraska, T 2018, 'Estimating the impact of unknown unknowns on aggregate qery results', ACM Transactions on Database Systems, vol. 43, no. 1, 3. https://doi.org/10.1145/3167970","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W40202779","https://openalex.org/W141764152","https://openalex.org/W189284728","https://openalex.org/W191253868","https://openalex.org/W327170683","https://openalex.org/W1044605864","https://openalex.org/W1521736627","https://openalex.org/W1524520778","https://openalex.org/W1532364676","https://openalex.org/W1551839888","https://openalex.org/W1553554986","https://openalex.org/W1558982506","https://openalex.org/W1583345983","https://openalex.org/W1585701772","https://openalex.org/W1592355944","https://openalex.org/W1601435884","https://openalex.org/W1605921839","https://openalex.org/W1610496399","https://openalex.org/W1971405816","https://openalex.org/W1987094625","https://openalex.org/W2002287579","https://openalex.org/W2006346002","https://openalex.org/W2010436309","https://openalex.org/W2012315687","https://openalex.org/W2018592576","https://openalex.org/W2020740057","https://openalex.org/W2022257958","https://openalex.org/W2026289298","https://openalex.org/W2026462408","https://openalex.org/W2032421424","https://openalex.org/W2049633694","https://openalex.org/W2056210580","https://openalex.org/W2058991275","https://openalex.org/W2068645020","https://openalex.org/W2082092506","https://openalex.org/W2083293881","https://openalex.org/W2093189534","https://openalex.org/W2100358124","https://openalex.org/W2114413252","https://openalex.org/W2120176508","https://openalex.org/W2127090196","https://openalex.org/W2129728332","https://openalex.org/W2137479650","https://openalex.org/W2138309709","https://openalex.org/W2138965424","https://openalex.org/W2152057064","https://openalex.org/W2168144930","https://openalex.org/W2312391515","https://openalex.org/W2398093188","https://openalex.org/W2510474575","https://openalex.org/W2523117481","https://openalex.org/W4233471163","https://openalex.org/W4237364687","https://openalex.org/W4250819783","https://openalex.org/W6636177537"],"related_works":["https://openalex.org/W4380150146","https://openalex.org/W3024870410","https://openalex.org/W2410652950","https://openalex.org/W4283773154","https://openalex.org/W3139174110","https://openalex.org/W4289597203","https://openalex.org/W2085630472","https://openalex.org/W3216372614","https://openalex.org/W2187819724","https://openalex.org/W2094985717"],"abstract_inverted_index":{"It":[0],"is":[1,43,82,148],"common":[2],"practice":[3],"for":[4],"data":[5,12,27,37,51,70,88,101,130,160],"scientists":[6],"to":[7,14,63,92,150],"acquire":[8],"and":[9,25,40,60,96,108],"integrate":[10],"disparate":[11],"sources":[13,89,131],"achieve":[15],"higher":[16],"quality":[17],"results.":[18],"But":[19],"even":[20],"with":[21],"a":[22,120,135],"perfectly":[23],"cleaned":[24],"merged":[26],"set,":[28],"two":[29],"fundamental":[30],"questions":[31],"remain:":[32],"(1)":[33],"Is":[34],"the":[35,44,65,68,84,94,99,115,129,143,153],"integrated":[36,159],"set":[38],"complete?":[39],"(2)":[41],"What":[42],"impact":[45,66,144],"of":[46,67,98,137,145,155],"any":[47],"unknown":[48,69,72,146],"(i.e.,":[49],"unobserved)":[50],"on":[52,75],"query":[53],"results?":[54],"In":[55],"this":[56],"work,":[57],"we":[58,117,139],"develop":[59],"analyze":[61],"techniques":[62,105],"estimate":[64,93],"(a.k.a.,":[71],"unknowns":[73,147],")":[74],"simple":[76],"aggregate":[77,156],"queries.":[78],"The":[79],"key":[80],"idea":[81],"that":[83,123,141],"overlap":[85],"between":[86],"different":[87],"enables":[90],"us":[91],"number":[95],"values":[97],"missing":[100],"items.":[102],"Our":[103],"main":[104],"are":[106,132],"parameter-free":[107],"do":[109],"not":[110],"assume":[111],"prior":[112],"knowledge":[113],"about":[114],"distribution;":[116],"also":[118],"propose":[119],"parametric":[121],"model":[122],"can":[124],"be":[125],"used":[126],"instead":[127],"when":[128],"imbalanced.":[133],"Through":[134],"series":[136],"experiments,":[138],"show":[140],"estimating":[142],"invaluable":[149],"better":[151],"assess":[152],"results":[154],"queries":[157],"over":[158],"sources.":[161]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
