{"id":"https://openalex.org/W4402673593","doi":"https://doi.org/10.1109/e-science62913.2024.10678667","title":"Data Drift for Automatic FAIR-compliant Dataset Versioning in Large Repositories","display_name":"Data Drift for Automatic FAIR-compliant Dataset Versioning in Large Repositories","publication_year":2024,"publication_date":"2024-09-16","ids":{"openalex":"https://openalex.org/W4402673593","doi":"https://doi.org/10.1109/e-science62913.2024.10678667"},"language":"en","primary_location":{"id":"doi:10.1109/e-science62913.2024.10678667","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/e-science62913.2024.10678667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://norma.ncirl.ie/7065/1/GonzalezCebrian-etal-eScience24.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090919422","display_name":"Alba Gonz\u00e1lez\u2013Cebri\u00e1n","orcid":"https://orcid.org/0000-0002-7519-4917"},"institutions":[{"id":"https://openalex.org/I104546213","display_name":"National College of Ireland","ror":"https://ror.org/02qzs9336","country_code":"IE","type":"education","lineage":["https://openalex.org/I104546213"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Alba Gonz\u00e1lez\u2013Cebri\u00e1n","raw_affiliation_strings":["National College of Ireland,Cloud Competency Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"National College of Ireland,Cloud Competency Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I104546213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107436297","display_name":"Iulian Ciolacu","orcid":null},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Iulian Ciolacu","raw_affiliation_strings":["University POLITEHNICA of Bucharest,Faculty of Automatic Control and Computers,Bucharest,Romania"],"affiliations":[{"raw_affiliation_string":"University POLITEHNICA of Bucharest,Faculty of Automatic Control and Computers,Bucharest,Romania","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016377628","display_name":"Michael J. Bradford","orcid":"https://orcid.org/0000-0002-4810-0495"},"institutions":[{"id":"https://openalex.org/I104546213","display_name":"National College of Ireland","ror":"https://ror.org/02qzs9336","country_code":"IE","type":"education","lineage":["https://openalex.org/I104546213"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Michael Bradford","raw_affiliation_strings":["National College of Ireland,Cloud Competency Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"National College of Ireland,Cloud Competency Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I104546213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024776367","display_name":"Ciprian Dobre","orcid":"https://orcid.org/0000-0003-4638-7725"},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Ciprian Dobre","raw_affiliation_strings":["University POLITEHNICA of Bucharest,Faculty of Automatic Control and Computers,Bucharest,Romania"],"affiliations":[{"raw_affiliation_string":"University POLITEHNICA of Bucharest,Faculty of Automatic Control and Computers,Bucharest,Romania","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088758379","display_name":"Horacio Gonz\u00e1lez\u2013V\u00e9lez","orcid":"https://orcid.org/0000-0003-0241-6053"},"institutions":[{"id":"https://openalex.org/I104546213","display_name":"National College of Ireland","ror":"https://ror.org/02qzs9336","country_code":"IE","type":"education","lineage":["https://openalex.org/I104546213"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Horacio Gonz\u00e1lez\u2013V\u00e9lez","raw_affiliation_strings":["National College of Ireland,Cloud Competency Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"National College of Ireland,Cloud Competency Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I104546213"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090919422"],"corresponding_institution_ids":["https://openalex.org/I104546213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24803465,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7220124006271362},{"id":"https://openalex.org/keywords/software-versioning","display_name":"Software versioning","score":0.6829372644424438},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33718544244766235},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18406137824058533},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.17494219541549683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7220124006271362},{"id":"https://openalex.org/C198140048","wikidata":"https://www.wikidata.org/wiki/Q10859422","display_name":"Software versioning","level":3,"score":0.6829372644424438},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33718544244766235},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18406137824058533},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.17494219541549683}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/e-science62913.2024.10678667","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/e-science62913.2024.10678667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},{"id":"pmh:oai:norma.ncirl.ie:7065","is_oa":true,"landing_page_url":null,"pdf_url":"https://norma.ncirl.ie/7065/1/GonzalezCebrian-etal-eScience24.pdf","source":{"id":"https://openalex.org/S7407055118","display_name":"NORMA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"pmh:oai:norma.ncirl.ie:7065","is_oa":true,"landing_page_url":null,"pdf_url":"https://norma.ncirl.ie/7065/1/GonzalezCebrian-etal-eScience24.pdf","source":{"id":"https://openalex.org/S7407055118","display_name":"NORMA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5750848692","display_name":null,"funder_award_id":"PN-III-P3-3.5-EUK-2019-0241","funder_id":"https://openalex.org/F4320323983","funder_display_name":"Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"},{"id":"https://openalex.org/G8141067265","display_name":null,"funder_award_id":"PNCDI III","funder_id":"https://openalex.org/F4320323983","funder_display_name":"Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"}],"funders":[{"id":"https://openalex.org/F4320320834","display_name":"Enterprise Ireland","ror":"https://ror.org/023z51242"},{"id":"https://openalex.org/F4320323983","display_name":"Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii","ror":"https://ror.org/01q7jq182"},{"id":"https://openalex.org/F4320338388","display_name":"Eurostars","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4402673593.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W297231882","https://openalex.org/W1585854823","https://openalex.org/W1965555277","https://openalex.org/W2028138594","https://openalex.org/W2054610549","https://openalex.org/W2089468765","https://openalex.org/W2099419573","https://openalex.org/W2122538988","https://openalex.org/W2146950091","https://openalex.org/W2263593861","https://openalex.org/W2302501749","https://openalex.org/W2807601743","https://openalex.org/W3137190587","https://openalex.org/W4300773606","https://openalex.org/W4312380846","https://openalex.org/W4318147592","https://openalex.org/W4394609417","https://openalex.org/W4394779357","https://openalex.org/W6679182178","https://openalex.org/W6755672337"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W1503327463","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Construed":[0],"as":[1,35],"a":[2],"shift":[3],"in":[4,139,199],"the":[5,18,50,64,76,119,170,187,200,203],"distribution":[6],"or":[7],"structure":[8],"of":[9,20,54,66,121,189,202],"data":[10,13,31,84,125,193,207],"over":[11],"time,":[12],"drift":[14,32,126,146,208],"can":[15,215],"adversely":[16],"affect":[17],"performance":[19,140],"machine":[21],"learning":[22],"models":[23],"and":[24,39,59,92,98,103,110,128,162,213],"data-driven":[25],"decisions.":[26],"This":[27],"study":[28],"examines":[29],"two":[30,171],"metrics,":[33,68,172],"denoted":[34],"d<inf":[36,40],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[37,41],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">E,PCA</inf>":[38],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">E,AE</inf>,":[42],"that":[43,134,174],"are":[44,96],"derived":[45,210],"from":[46,75,211],"unsupervised":[47,104],"ML":[48],"models:":[49],"reconstruction":[51],"error-based":[52],"metrics":[53,136,179,191,209],"Principal":[55],"Component":[56],"Analysis":[57],"(PCA)":[58],"Autoencoders":[60],"(AE).":[61],"To":[62],"investigate":[63],"robustness":[65],"these":[67,190],"we":[69],"have":[70,82],"systematically":[71],"accessed":[72],"time-series":[73],"datasets":[74,156],"European":[77],"Data":[78],"Portal.":[79],"Our":[80,131],"experiments":[81],"examined":[83],"versioning":[85],"through":[86],"three":[87],"basic":[88],"events:":[89],"creation,":[90],"update,":[91],"deletion.":[93],"The":[94],"results":[95,132],"summarised":[97],"aggregated":[99],"for":[100,159],"all":[101],"datasets,":[102],"analysis":[105],"based":[106],"on":[107,124],"Robust":[108],"PCA":[109,161,204,212],"AE":[111,163,214],"has":[112],"been":[113],"performed":[114],"to":[115,220],"examine":[116],"patterns":[117],"within":[118],"impact":[120],"dataset":[122,222],"characteristics":[123],"detection":[127,147],"computational":[129],"efficiency.":[130],"indicate":[133],"both":[135,160],"aligned":[137],"closely":[138],"with":[141,151],"new":[142],"records,":[143],"suggesting":[144,173],"consistent":[145],"under":[148],"normal":[149],"conditions":[150],"FAIR":[152],"compliance.":[153],"However,":[154],"high-dimensional":[155],"posed":[157],"challenges":[158],"models.":[164],"Update":[165],"events":[166,185],"revealed":[167,197],"discrepancies":[168],"between":[169],"non-linear":[175],"shifts":[176],"affected":[177],"AE-based":[178],"more":[180],"than":[181],"PCA-based":[182],"ones.":[183],"Deletion":[184],"demonstrated":[186],"resilience":[188],"against":[192],"loss,":[194],"but":[195,218],"also":[196],"variability":[198],"reliability":[201],"model;":[205],"i.e.,":[206],"be":[216],"effective":[217],"sensitive":[219],"certain":[221],"characteristics.":[223]},"counts_by_year":[],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-10-10T00:00:00"}
