{"id":"https://openalex.org/W4406458842","doi":"https://doi.org/10.1109/bigdata62323.2024.10826067","title":"Crystalia: Flexible and Efficient Method for Large Dataset Lineage Tracking","display_name":"Crystalia: Flexible and Efficient Method for Large Dataset Lineage Tracking","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458842","doi":"https://doi.org/10.1109/bigdata62323.2024.10826067"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10826067","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101828853","display_name":"Vladimir Korolev","orcid":"https://orcid.org/0000-0002-0211-8104"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Vladimir Korolev","raw_affiliation_strings":["UMBC,CSEE Department,Baltimore,MD"],"affiliations":[{"raw_affiliation_string":"UMBC,CSEE Department,Baltimore,MD","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020975010","display_name":"Anupam Joshi","orcid":"https://orcid.org/0000-0002-8641-3193"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anupam Joshi","raw_affiliation_strings":["UMBC,CSEE Department,Baltimore,MD"],"affiliations":[{"raw_affiliation_string":"UMBC,CSEE Department,Baltimore,MD","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101828853"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0778,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88693528,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3431","last_page":"3440"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7468166947364807},{"id":"https://openalex.org/keywords/lineage","display_name":"Lineage (genetic)","score":0.6212934255599976},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.6122068762779236},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35703209042549133},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.0897027850151062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7468166947364807},{"id":"https://openalex.org/C2776817793","wikidata":"https://www.wikidata.org/wiki/Q6553369","display_name":"Lineage (genetic)","level":3,"score":0.6212934255599976},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.6122068762779236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35703209042549133},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0897027850151062},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10826067","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W125869453","https://openalex.org/W143079841","https://openalex.org/W269919247","https://openalex.org/W1501507312","https://openalex.org/W1600255172","https://openalex.org/W1906647448","https://openalex.org/W1976306141","https://openalex.org/W2029021214","https://openalex.org/W2132711844","https://openalex.org/W2144981148","https://openalex.org/W2166420405","https://openalex.org/W2245551072","https://openalex.org/W2266206262","https://openalex.org/W2302501749","https://openalex.org/W3003265726","https://openalex.org/W3161643086","https://openalex.org/W3179292387","https://openalex.org/W3215259255","https://openalex.org/W4206657655","https://openalex.org/W4231078754","https://openalex.org/W4231488651","https://openalex.org/W4400118483","https://openalex.org/W4401132767","https://openalex.org/W6679054427","https://openalex.org/W6794945760","https://openalex.org/W6804656293"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2506149209","https://openalex.org/W4396701345","https://openalex.org/W2352153050","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Computation":[0],"over":[1],"large":[2,123],"data":[3,28],"sets":[4],"is":[5,29,76],"now":[6],"a":[7,30,86,145,177],"critical":[8],"part":[9],"of":[10,26,85,106,167],"most":[11],"natural":[12],"and":[13,24,54,99,148,170,186],"social":[14],"sciences.":[15],"As":[16],"such,":[17],"having":[18],"ways":[19],"to":[20,44,129,133],"capture":[21],"the":[22,27,83,96],"provenance":[23,57,190],"lineage":[25],"sine":[31],"qua":[32],"non":[33],"for":[34,40,58,65,79,151],"not":[35],"just":[36],"reproducibility,":[37],"but":[38,61],"also":[39],"these":[41,111,122],"computational":[42,59,134],"approaches":[43],"inform":[45],"public":[46],"policy.":[47],"Standards":[48],"like":[49,95],"PROV-O,":[50],"RO-Crate,":[51],"IEEE":[52],"2791,":[53],"ISO-23494":[55],"define":[56],"processes":[60],"lack":[62],"detailed":[63],"specifications":[64],"describing":[66],"entities":[67],"beyond":[68],"simple":[69],"URIs":[70],"or":[71,88],"linear":[72,113],"hash":[73,114],"sums.":[74],"This":[75],"usually":[77],"sufficient":[78],"smaller":[80],"datasets":[81,92,124],"with":[82,188],"size":[84],"gigabyte":[87],"two.":[89],"However,":[90],"contempory":[91],"in":[93,108,176],"domains":[94],"life":[97],"sciences":[98],"high":[100],"energy":[101],"physics":[102],"often":[103],"exceed":[104],"tens":[105],"terabytes":[107],"size.":[109],"For":[110],"datasets,":[112],"sums":[115],"are":[116,125],"no":[117],"longer":[118],"feasible.":[119],"In":[120],"addition":[121],"frequently":[126],"copied":[127],"around":[128],"bring":[130],"them":[131],"closer":[132],"resources,":[135],"limiting":[136],"URL":[137],"based":[138],"tracking.In":[139],"this":[140],"article,":[141],"we":[142],"introduce":[143],"Crystalia,":[144],"novel,":[146],"flexible,":[147],"parallelizable":[149],"method":[150],"computing":[152],"dataset":[153,168],"descriptions":[154],"using":[155],"Merkle":[156],"trees,":[157],"represented":[158],"as":[159],"Knowledge":[160,181],"Graphs.":[161],"Our":[162],"solution":[163],"enables":[164],"efficient":[165],"tracking":[166],"versions":[169],"transformations":[171],"by":[172],"capturing":[173],"reproducibility":[174],"information":[175],"scalable":[178],"manner.":[179],"Leveraging":[180],"Graphs":[182],"facilitates":[183],"annotation,":[184],"exchange,":[185],"integration":[187],"existing":[189],"standards.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
