{"id":"https://openalex.org/W2438459259","doi":"https://doi.org/10.1109/icde.2016.7498366","title":"Dark Data: Are we solving the right problems?","display_name":"Dark Data: Are we solving the right problems?","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2438459259","doi":"https://doi.org/10.1109/icde.2016.7498366","mag":"2438459259"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2016.7498366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039133265","display_name":"Michael Cafarella","orcid":"https://orcid.org/0000-0001-6122-0590"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Cafarella","raw_affiliation_strings":["University of Michigan, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Michigan, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000141065","display_name":"Ihab F. Ilyas","orcid":"https://orcid.org/0000-0001-9052-9714"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ihab F. Ilyas","raw_affiliation_strings":["University of Waterloo, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Waterloo, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038422812","display_name":"Marcel Kornacker","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118958","display_name":"Cloudera (United States)","ror":"https://ror.org/02bwhra22","country_code":"US","type":"company","lineage":["https://openalex.org/I4210118958"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcel Kornacker","raw_affiliation_strings":["Cloudera, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cloudera, USA","institution_ids":["https://openalex.org/I4210118958"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034086130","display_name":"Tim Kraska","orcid":"https://orcid.org/0009-0003-2414-2759"},"institutions":[{"id":"https://openalex.org/I175594653","display_name":"John Brown University","ror":"https://ror.org/02ct41q97","country_code":"US","type":"education","lineage":["https://openalex.org/I175594653"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tim Kraska","raw_affiliation_strings":["Brown University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brown University, USA","institution_ids":["https://openalex.org/I175594653"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103852640","display_name":"Christopher R\u00e9","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Re","raw_affiliation_strings":["Stanford University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8653,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.87366596,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1444","last_page":"1445"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9563999772071838,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6381841897964478},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6332412958145142},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.6312582492828369},{"id":"https://openalex.org/keywords/possession","display_name":"Possession (linguistics)","score":0.6105802059173584},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.518913984298706},{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.4829932451248169},{"id":"https://openalex.org/keywords/enterprise-data-management","display_name":"Enterprise data management","score":0.47024980187416077},{"id":"https://openalex.org/keywords/data-migration","display_name":"Data migration","score":0.4615367352962494},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4310421943664551},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4287911057472229},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.30727946758270264},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2958247661590576},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.22615131735801697},{"id":"https://openalex.org/keywords/enterprise-information-system","display_name":"Enterprise information system","score":0.15666037797927856},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.10116198658943176}],"concepts":[{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6381841897964478},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6332412958145142},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.6312582492828369},{"id":"https://openalex.org/C2780193096","wikidata":"https://www.wikidata.org/wiki/Q3543662","display_name":"Possession (linguistics)","level":2,"score":0.6105802059173584},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.518913984298706},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.4829932451248169},{"id":"https://openalex.org/C136227091","wikidata":"https://www.wikidata.org/wiki/Q5380376","display_name":"Enterprise data management","level":3,"score":0.47024980187416077},{"id":"https://openalex.org/C146152329","wikidata":"https://www.wikidata.org/wiki/Q1932543","display_name":"Data migration","level":2,"score":0.4615367352962494},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4310421943664551},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4287911057472229},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.30727946758270264},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2958247661590576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.22615131735801697},{"id":"https://openalex.org/C27295321","wikidata":"https://www.wikidata.org/wiki/Q831795","display_name":"Enterprise information system","level":2,"score":0.15666037797927856},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.10116198658943176},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icde.2016.7498366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5699999928474426,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2382249868","https://openalex.org/W3198250195","https://openalex.org/W2965164214","https://openalex.org/W2140518328","https://openalex.org/W4367303911","https://openalex.org/W4393006833","https://openalex.org/W2389897747","https://openalex.org/W1587608488","https://openalex.org/W2115553997","https://openalex.org/W3154441264"],"abstract_inverted_index":{"With":[0,44],"the":[1,5,23,45,86,115,152],"increasing":[2],"urge":[3],"of":[4,40,48,85,88,124,137],"enterprises":[6,58,108],"to":[7,19,29,34,51,101,117],"ingest":[8],"as":[9,12,20],"much":[10],"data":[11,63,92,100,105,127,154],"they":[13],"can":[14],"in":[15,111,157],"what's":[16],"commonly":[17],"referred":[18],"\u201cData":[21],"Lakes\u201d,":[22],"new":[24],"environment":[25],"presents":[26],"serious":[27],"challenges":[28,156],"traditional":[30,57],"ETL":[31],"models":[32],"and":[33,145],"building":[35],"analytic":[36],"layers":[37],"on":[38,80,151],"top":[39],"well-understood":[41],"global":[42],"schema.":[43],"recent":[46],"development":[47],"multiple":[49],"technologies":[50],"support":[52],"this":[53,126],"\u201cload-first\u201d":[54],"paradigm,":[55],"even":[56,68],"have":[59,67,110,148],"fairly":[60],"large":[61],"HDFS-based":[62],"lakes":[64],"now.":[65],"They":[66],"had":[69],"them":[70],"long":[71],"enough":[72],"that":[73],"their":[74,90,112],"first":[75],"generation":[76],"IT":[77],"projects":[78],"delivered":[79],"some,":[81],"but":[82],"not":[83],"all,":[84],"promise":[87],"integrating":[89],"enterprise's":[91],"assets.":[93],"In":[94,129],"short,":[95],"we":[96,147],"moved":[97],"from":[98],"no":[99],"Dark":[102,104,143],"data.":[103],"is":[106,141],"what":[107,125],"might":[109,133],"possession,":[113],"without":[114],"ability":[116],"access":[118],"it":[119],"or":[120],"with":[121,159],"limited":[122],"awareness":[123],"represents.":[128],"particular,":[130],"business-critical":[131],"information":[132],"still":[134],"remain":[135],"out":[136],"reach.":[138],"This":[139],"panel":[140],"about":[142],"Data":[144],"whether":[146],"been":[149],"focusing":[150],"right":[153],"management":[155],"dealing":[158],"it.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
