{"id":"https://openalex.org/W4220709071","doi":"https://doi.org/10.1162/dint_a_00141","title":"Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes","display_name":"Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4220709071","doi":"https://doi.org/10.1162/dint_a_00141"},"language":"en","primary_location":{"id":"doi:10.1162/dint_a_00141","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00141","pdf_url":"https://direct.mit.edu/dint/article-pdf/4/2/426/2012419/dint_a_00141.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/dint/article-pdf/4/2/426/2012419/dint_a_00141.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068872311","display_name":"Hendrik Nolte","orcid":"https://orcid.org/0000-0003-2138-8510"},"institutions":[{"id":"https://openalex.org/I4210091733","display_name":"Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen","ror":"https://ror.org/00cd95c65","country_code":"DE","type":"other","lineage":["https://openalex.org/I4210091733","https://openalex.org/I74656192"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Hendrik Nolte","raw_affiliation_strings":["Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen G\u00f6ttingen, Gottingen 37077, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen G\u00f6ttingen, Gottingen 37077, Germany","institution_ids":["https://openalex.org/I4210091733"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063771413","display_name":"Philipp Wieder","orcid":"https://orcid.org/0000-0002-6992-1866"},"institutions":[{"id":"https://openalex.org/I4210091733","display_name":"Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen","ror":"https://ror.org/00cd95c65","country_code":"DE","type":"other","lineage":["https://openalex.org/I4210091733","https://openalex.org/I74656192"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Wieder","raw_affiliation_strings":["Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen G\u00f6ttingen, Gottingen 37077, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Gesellschaft f\u00fcr wissenschaftliche Datenverarbeitung mbH G\u00f6ttingen G\u00f6ttingen, Gottingen 37077, Germany","institution_ids":["https://openalex.org/I4210091733"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068872311"],"corresponding_institution_ids":["https://openalex.org/I4210091733"],"apc_list":null,"apc_paid":null,"fwci":1.3982,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.89296871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"4","issue":"2","first_page":"426","last_page":"438"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7665270566940308},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7312021851539612},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7275258898735046},{"id":"https://openalex.org/keywords/reusability","display_name":"Reusability","score":0.4910888671875},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4805096387863159},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.44727593660354614},{"id":"https://openalex.org/keywords/data-mapping","display_name":"Data mapping","score":0.44668492674827576},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.4365570545196533},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.42880091071128845},{"id":"https://openalex.org/keywords/data-type","display_name":"Data type","score":0.41548100113868713},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.3868274688720703},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3477175235748291},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2648141384124756},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.23185855150222778},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.155506432056427},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.11199697852134705},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1044577956199646}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7665270566940308},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7312021851539612},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7275258898735046},{"id":"https://openalex.org/C137981799","wikidata":"https://www.wikidata.org/wiki/Q1369184","display_name":"Reusability","level":3,"score":0.4910888671875},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4805096387863159},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.44727593660354614},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.44668492674827576},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.4365570545196533},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.42880091071128845},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.41548100113868713},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3868274688720703},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3477175235748291},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2648141384124756},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.23185855150222778},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.155506432056427},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.11199697852134705},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1044577956199646},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/dint_a_00141","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00141","pdf_url":"https://direct.mit.edu/dint/article-pdf/4/2/426/2012419/dint_a_00141.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:dcf6871ecb574ed3a426faa5e56e1372","is_oa":false,"landing_page_url":"https://doaj.org/article/dcf6871ecb574ed3a426faa5e56e1372","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data Intelligence, Vol 4, Iss 2 (2022)","raw_type":"article"},{"id":"pmh:oai:publications.goettingen-research-online.de:2/129373","is_oa":true,"landing_page_url":"https://publications.goettingen-research-online.de/handle/2/121151","pdf_url":null,"source":{"id":"https://openalex.org/S4306401634","display_name":"GoeScholar  The Publication Server of the Georg-August-Universit\u00e4t G\u00f6ttingen (Georg-August-Universit\u00e4t G\u00f6ttingen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210122495","host_organization_name":"Asklepios Klinik St. Georg","host_organization_lineage":["https://openalex.org/I4210122495"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"yes"}],"best_oa_location":{"id":"doi:10.1162/dint_a_00141","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00141","pdf_url":"https://direct.mit.edu/dint/article-pdf/4/2/426/2012419/dint_a_00141.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2251157814","https://openalex.org/W2539403003","https://openalex.org/W2562004858","https://openalex.org/W2575168421","https://openalex.org/W2594526919","https://openalex.org/W2785444232","https://openalex.org/W2883070524","https://openalex.org/W2966917845","https://openalex.org/W2968441198","https://openalex.org/W3013553122","https://openalex.org/W3037921969","https://openalex.org/W3094550016","https://openalex.org/W3107926801","https://openalex.org/W4238202281","https://openalex.org/W4255463977","https://openalex.org/W6753286780"],"related_works":["https://openalex.org/W4386794293","https://openalex.org/W3206611000","https://openalex.org/W4383737612","https://openalex.org/W2189057767","https://openalex.org/W4294078452","https://openalex.org/W4382405569","https://openalex.org/W4385609697","https://openalex.org/W4382405570","https://openalex.org/W2558717786","https://openalex.org/W3122748829"],"abstract_inverted_index":{"Abstract":[0],"Since":[1],"their":[2],"introduction":[3],"by":[4,17],"James":[5],"Dixon":[6],"in":[7,41,84,135],"2010,":[8],"data":[9,26,62,82,112,142,146],"lakes":[10,83],"get":[11],"more":[12,14],"and":[13,77,161,166],"attention,":[15],"driven":[16],"the":[18,24,29,45,49,65,72,108,121,158],"promise":[19],"of":[20,23,48,81,123,149],"high":[21],"reusability":[22],"stored":[25],"due":[27],"to":[28,43,86,115,139,156],"schema-on-read":[30],"semantics.":[31],"Building":[32],"on":[33,120,145,171],"this":[34,104],"idea,":[35],"several":[36],"additional":[37],"requirements":[38,89],"were":[39],"discussed":[40],"literature":[42],"improve":[44],"general":[46],"usability":[47],"concept,":[50],"like":[51],"a":[52,75,78,116,136,141,172],"central":[53],"metadata":[54],"catalog":[55],"including":[56],"all":[57],"provenance":[58],"information,":[59],"an":[60],"overarching":[61],"governance,":[63],"or":[64],"integration":[66],"with":[67],"(high-performance)":[68],"processing":[69,168],"capabilities.":[70],"Although":[71],"necessity":[73],"for":[74],"logical":[76],"physical":[79,159],"organisation":[80,106],"order":[85],"meet":[87],"those":[88],"is":[90,107,113],"widely":[91],"recognized,":[92],"no":[93],"concrete":[94],"guidelines":[95],"are":[96],"yet":[97],"provided.":[98],"The":[99],"most":[100],"common":[101],"architecture":[102],"implementing":[103],"conceptual":[105],"zone":[109,118],"architecture,":[110],"where":[111],"assigned":[114],"certain":[117],"depending":[119],"degree":[122],"processing.":[124],"This":[125],"paper":[126],"discusses":[127],"how":[128,151,162],"FAIR":[129],"Digital":[130],"Objects":[131],"can":[132,153],"be":[133,154],"used":[134,155],"novel":[137],"approach":[138],"organize":[140],"lake":[143],"based":[144,170],"types":[147],"instead":[148],"zones,":[150],"they":[152,163],"abstract":[157],"implementation,":[160],"empower":[164],"generic":[165],"portable":[167],"capabilities":[169],"provenance-based":[173],"approach.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-27T09:02:27.158192","created_date":"2025-10-10T00:00:00"}
