{"id":"https://openalex.org/W2970992672","doi":"https://doi.org/10.14778/3352063.3352116","title":"Data lake management","display_name":"Data lake management","publication_year":2019,"publication_date":"2019-08-01","ids":{"openalex":"https://openalex.org/W2970992672","doi":"https://doi.org/10.14778/3352063.3352116","mag":"2970992672"},"language":"en","primary_location":{"id":"doi:10.14778/3352063.3352116","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3352063.3352116","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012572863","display_name":"Fatemeh Nargesian","orcid":"https://orcid.org/0000-0002-4710-8719"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Fatemeh Nargesian","raw_affiliation_strings":["University of Toronto"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013934423","display_name":"Erkang Zhu","orcid":"https://orcid.org/0009-0000-3326-1790"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Erkang Zhu","raw_affiliation_strings":["University of Toronto"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022619313","display_name":"Ren\u00e9e J. Miller","orcid":"https://orcid.org/0000-0002-1484-4787"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Ren\u00e9e J. Miller","raw_affiliation_strings":["Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108778575","display_name":"Ken Q. Pu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119257","display_name":"University of Information Technology and Communications","ror":"https://ror.org/028h0pd91","country_code":"IQ","type":"education","lineage":["https://openalex.org/I4210119257"]}],"countries":["IQ"],"is_corresponding":false,"raw_author_name":"Ken Q. Pu","raw_affiliation_strings":["UOIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UOIT","institution_ids":["https://openalex.org/I4210119257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079836420","display_name":"Patricia C. Arocena","orcid":null},"institutions":[{"id":"https://openalex.org/I2802549117","display_name":"TD Bank Group","ror":"https://ror.org/04jjb9549","country_code":"CA","type":"other","lineage":["https://openalex.org/I2802549117"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Patricia C. Arocena","raw_affiliation_strings":["TD Bank Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TD Bank Group","institution_ids":["https://openalex.org/I2802549117"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":16.4648,"has_fulltext":false,"cited_by_count":236,"citation_normalized_percentile":{"value":0.99356042,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"12","issue":"12","first_page":"1986","last_page":"1989"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8346608877182007},{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.737026572227478},{"id":"https://openalex.org/keywords/metadata-management","display_name":"Metadata management","score":0.6476186513900757},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6464499235153198},{"id":"https://openalex.org/keywords/data-management-plan","display_name":"Data management plan","score":0.644713282585144},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6129046678543091},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.5418970584869385},{"id":"https://openalex.org/keywords/software-versioning","display_name":"Software versioning","score":0.5320920348167419},{"id":"https://openalex.org/keywords/data-mapping","display_name":"Data mapping","score":0.5081639289855957},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.5045355558395386},{"id":"https://openalex.org/keywords/data-element","display_name":"Data element","score":0.4531347155570984},{"id":"https://openalex.org/keywords/research-data","display_name":"Research data","score":0.416089802980423},{"id":"https://openalex.org/keywords/data-virtualization","display_name":"Data virtualization","score":0.4121890664100647},{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.3370245099067688},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3257668614387512},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.30520468950271606},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.08838492631912231}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8346608877182007},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.737026572227478},{"id":"https://openalex.org/C2779489174","wikidata":"https://www.wikidata.org/wiki/Q6822246","display_name":"Metadata management","level":3,"score":0.6476186513900757},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6464499235153198},{"id":"https://openalex.org/C158746014","wikidata":"https://www.wikidata.org/wiki/Q17085509","display_name":"Data management plan","level":3,"score":0.644713282585144},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6129046678543091},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.5418970584869385},{"id":"https://openalex.org/C198140048","wikidata":"https://www.wikidata.org/wiki/Q10859422","display_name":"Software versioning","level":3,"score":0.5320920348167419},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.5081639289855957},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.5045355558395386},{"id":"https://openalex.org/C30872290","wikidata":"https://www.wikidata.org/wiki/Q1172389","display_name":"Data element","level":3,"score":0.4531347155570984},{"id":"https://openalex.org/C3020038283","wikidata":"https://www.wikidata.org/wiki/Q42848","display_name":"Research data","level":3,"score":0.416089802980423},{"id":"https://openalex.org/C80344994","wikidata":"https://www.wikidata.org/wiki/Q5227369","display_name":"Data virtualization","level":4,"score":0.4121890664100647},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.3370245099067688},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3257668614387512},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.30520468950271606},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.08838492631912231},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.0},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3352063.3352116","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3352063.3352116","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W141724566","https://openalex.org/W297231882","https://openalex.org/W1014725521","https://openalex.org/W1679901020","https://openalex.org/W1969621019","https://openalex.org/W1976022204","https://openalex.org/W1976957990","https://openalex.org/W1996505782","https://openalex.org/W2054974985","https://openalex.org/W2057513751","https://openalex.org/W2063103859","https://openalex.org/W2066806792","https://openalex.org/W2096454106","https://openalex.org/W2102729564","https://openalex.org/W2108223890","https://openalex.org/W2117200425","https://openalex.org/W2130204178","https://openalex.org/W2140116426","https://openalex.org/W2162520370","https://openalex.org/W2171262729","https://openalex.org/W2216189112","https://openalex.org/W2290320465","https://openalex.org/W2341748398","https://openalex.org/W2424304400","https://openalex.org/W2425316268","https://openalex.org/W2438792749","https://openalex.org/W2440094130","https://openalex.org/W2584580687","https://openalex.org/W2585438896","https://openalex.org/W2610492020","https://openalex.org/W2750856415","https://openalex.org/W2750991217","https://openalex.org/W2767419415","https://openalex.org/W2795089200","https://openalex.org/W2795302121","https://openalex.org/W2798664493","https://openalex.org/W2805350385","https://openalex.org/W2807396045","https://openalex.org/W2810954846","https://openalex.org/W2889003264","https://openalex.org/W2904132383","https://openalex.org/W2906841437","https://openalex.org/W2948163032","https://openalex.org/W2963174348","https://openalex.org/W4300456194","https://openalex.org/W6732710093","https://openalex.org/W6733170484","https://openalex.org/W6749511090"],"related_works":["https://openalex.org/W2365178252","https://openalex.org/W3204280384","https://openalex.org/W3199469221","https://openalex.org/W2374161210","https://openalex.org/W2360527443","https://openalex.org/W4387914223","https://openalex.org/W2069448194","https://openalex.org/W3199626854","https://openalex.org/W2554009605","https://openalex.org/W1500074521"],"abstract_inverted_index":{"The":[0],"ubiquity":[1],"of":[2],"data":[3,11,22,25,30,50,52,54,56],"lakes":[4,31],"has":[5],"created":[6],"fascinating":[7],"new":[8,34],"challenges":[9],"for":[10,24,46],"management":[12,23],"research.":[13],"In":[14],"this":[15],"tutorial,":[16],"we":[17],"review":[18],"the":[19,44],"state-of-the-art":[20],"in":[21],"lakes.":[26],"We":[27],"consider":[28],"how":[29,40],"are":[32,42],"introducing":[33],"problems":[35,48],"including":[36,49],"dataset":[37],"discovery":[38],"and":[39,58],"they":[41],"changing":[43],"requirements":[45],"classic":[47],"extraction,":[51],"cleaning,":[53],"integration,":[55],"versioning,":[57],"metadata":[59],"management.":[60]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":43},{"year":2024,"cited_by_count":55},{"year":2023,"cited_by_count":47},{"year":2022,"cited_by_count":26},{"year":2021,"cited_by_count":26},{"year":2020,"cited_by_count":27}],"updated_date":"2026-06-14T07:44:22.658603","created_date":"2025-10-10T00:00:00"}
