{"id":"https://openalex.org/W3198863654","doi":"https://doi.org/10.1145/3472163.3472273","title":"Analysis-oriented Metadata for Data Lakes","display_name":"Analysis-oriented Metadata for Data Lakes","publication_year":2021,"publication_date":"2021-07-14","ids":{"openalex":"https://openalex.org/W3198863654","doi":"https://doi.org/10.1145/3472163.3472273","mag":"3198863654"},"language":"en","primary_location":{"id":"doi:10.1145/3472163.3472273","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472163.3472273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th International Database Engineering &amp; Applications Symposium","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005693626","display_name":"Yan Zhao","orcid":"https://orcid.org/0000-0002-6624-322X"},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Yan Zhao","raw_affiliation_strings":["Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084553107","display_name":"Franck Ravat","orcid":"https://orcid.org/0000-0003-4820-841X"},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Franck Ravat","raw_affiliation_strings":["Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114242578","display_name":"Julien Aligon","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Julien Aligon","raw_affiliation_strings":["Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089246855","display_name":"Chantal Soul\u00e9-Dupuy","orcid":"https://orcid.org/0000-0002-2637-724X"},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Chantal Soule-dupuy","raw_affiliation_strings":["Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113947264","display_name":"Gabriel Ferrettini","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Gabriel Ferrettini","raw_affiliation_strings":["Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Universite Toulouse (Capitole) and Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007467558","display_name":"Imen Megdiche","orcid":"https://orcid.org/0000-0002-1331-8662"},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Imen Megdiche","raw_affiliation_strings":["Institut de Recherche en Informatique de Toulouse, France"],"affiliations":[{"raw_affiliation_string":"Institut de Recherche en Informatique de Toulouse, France","institution_ids":["https://openalex.org/I4210119061"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5005693626"],"corresponding_institution_ids":["https://openalex.org/I4210119061"],"apc_list":null,"apc_paid":null,"fwci":0.87416478,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.75987416,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"194","last_page":"203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.9701744318008423},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8212477564811707},{"id":"https://openalex.org/keywords/metadata-repository","display_name":"Metadata repository","score":0.7975412607192993},{"id":"https://openalex.org/keywords/data-element","display_name":"Data element","score":0.7218149900436401},{"id":"https://openalex.org/keywords/metadata-modeling","display_name":"Metadata modeling","score":0.6867179274559021},{"id":"https://openalex.org/keywords/meta-data-services","display_name":"Meta Data Services","score":0.6530497670173645},{"id":"https://openalex.org/keywords/metadata-management","display_name":"Metadata management","score":0.6106924414634705},{"id":"https://openalex.org/keywords/data-mapping","display_name":"Data mapping","score":0.5012912750244141},{"id":"https://openalex.org/keywords/data-dictionary","display_name":"Data dictionary","score":0.48164311051368713},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.47416889667510986},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38823676109313965},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.31107401847839355},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25859832763671875}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.9701744318008423},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8212477564811707},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.7975412607192993},{"id":"https://openalex.org/C30872290","wikidata":"https://www.wikidata.org/wiki/Q1172389","display_name":"Data element","level":3,"score":0.7218149900436401},{"id":"https://openalex.org/C110326360","wikidata":"https://www.wikidata.org/wiki/Q17149476","display_name":"Metadata modeling","level":4,"score":0.6867179274559021},{"id":"https://openalex.org/C136976847","wikidata":"https://www.wikidata.org/wiki/Q6822297","display_name":"Meta Data Services","level":4,"score":0.6530497670173645},{"id":"https://openalex.org/C2779489174","wikidata":"https://www.wikidata.org/wiki/Q6822246","display_name":"Metadata management","level":3,"score":0.6106924414634705},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.5012912750244141},{"id":"https://openalex.org/C98143201","wikidata":"https://www.wikidata.org/wiki/Q1147639","display_name":"Data dictionary","level":3,"score":0.48164311051368713},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47416889667510986},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38823676109313965},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.31107401847839355},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25859832763671875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472163.3472273","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472163.3472273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th International Database Engineering &amp; Applications Symposium","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2063365570","https://openalex.org/W2159750251","https://openalex.org/W2171477123","https://openalex.org/W2372929729","https://openalex.org/W2908974255","https://openalex.org/W2911627187","https://openalex.org/W2913059114","https://openalex.org/W3028240514","https://openalex.org/W3032215537","https://openalex.org/W3122748829","https://openalex.org/W3147162460","https://openalex.org/W4213308398","https://openalex.org/W4250045841"],"related_works":["https://openalex.org/W2907247951","https://openalex.org/W3199469221","https://openalex.org/W3084772717","https://openalex.org/W2376038160","https://openalex.org/W2362378919","https://openalex.org/W2374379029","https://openalex.org/W3198863654","https://openalex.org/W1636990136","https://openalex.org/W2368830475","https://openalex.org/W2372299323"],"abstract_inverted_index":{"Data":[0],"lakes":[1],"are":[2],"supposed":[3],"to":[4,7,28,57,101,128,154],"enable":[5],"analysts":[6],"perform":[8],"more":[9],"efficient":[10],"and":[11,22,92,130,135,173],"efficacious":[12],"data":[13,19,33,63,82,120,145,149],"analysis":[14,49],"by":[15,137],"crossing":[16],"multiple":[17],"existing":[18,132],"sources,":[20],"processes":[21,134],"analyses.":[23],"However,":[24],"it":[25],"is":[26,56],"impossible":[27],"achieve":[29],"that":[30,42,65],"when":[31],"a":[32,38,143],"lake":[34,64,121],"does":[35],"not":[36],"have":[37,58],"metadata":[39,79,99,113,122,140,157],"governance":[40],"system":[41],"progressively":[43],"capitalizes":[44,66],"on":[45,67,107],"all":[46,68,98],"the":[47,87,102,148,159],"performed":[48,106],"experiments.":[50],"The":[51],"objective":[52],"of":[53,90,119],"this":[54,73],"paper":[55],"an":[59,77,117],"easily":[60,155],"accessible,":[61],"reusable":[62],"user":[69],"experiences.":[70],"To":[71,110,151],"meet":[72],"need,":[74],"we":[75,115,161],"propose":[76],"analysis-oriented":[78],"model":[80,85],"for":[81],"lakes.":[83],"This":[84,124],"includes":[86],"descriptive":[88],"information":[89],"datasets":[91,170],"their":[93],"attributes,":[94],"as":[95,97],"well":[96],"related":[100],"machine":[103,174],"learning":[104,175],"analyzes":[105],"these":[108],"datasets.":[109],"illustrate":[111],"our":[112],"solution,":[114],"implemented":[116],"application":[118,125],"management.":[123],"allows":[126],"users":[127],"find":[129],"use":[131,164],"data,":[133,168],"analyses":[136],"searching":[138],"relevant":[139],"stored":[141],"in":[142],"NoSQL":[144],"store":[146],"within":[147],"lake.":[150],"demonstrate":[152],"how":[153],"discover":[156],"with":[158,166],"application,":[160],"present":[162],"two":[163],"cases,":[165],"real":[167],"including":[169],"similarity":[171],"detection":[172],"guidance.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
