{"id":"https://openalex.org/W3176771520","doi":"https://doi.org/10.1145/3448016.3452788","title":"LIMA: Fine-grained Lineage Tracing and Reuse in Machine Learning Systems","display_name":"LIMA: Fine-grained Lineage Tracing and Reuse in Machine Learning Systems","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3176771520","doi":"https://doi.org/10.1145/3448016.3452788","mag":"3176771520"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3452788","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452788","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001426766","display_name":"Arnab Phani","orcid":"https://orcid.org/0009-0001-2935-0608"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Arnab Phani","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012610732","display_name":"Benjamin Rath","orcid":"https://orcid.org/0000-0002-2516-2394"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Benjamin Rath","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053153176","display_name":"Matthias B\u00f6ehm","orcid":"https://orcid.org/0000-0003-1344-3663"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Matthias Boehm","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001426766"],"corresponding_institution_ids":["https://openalex.org/I4092182"],"apc_list":null,"apc_paid":null,"fwci":7.0338,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.96661633,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1426","last_page":"1439"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.78200364112854},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6920464038848877},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.6536740064620972},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6528814435005188},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5679192543029785},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5456429719924927},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3566403090953827},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.25493913888931274},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.17739924788475037},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15147733688354492}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78200364112854},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6920464038848877},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.6536740064620972},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6528814435005188},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5679192543029785},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5456429719924927},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3566403090953827},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.25493913888931274},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.17739924788475037},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15147733688354492},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3448016.3452788","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452788","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":104,"referenced_works":["https://openalex.org/W22495568","https://openalex.org/W81045675","https://openalex.org/W89955767","https://openalex.org/W135267584","https://openalex.org/W1485275407","https://openalex.org/W1513618424","https://openalex.org/W1552694902","https://openalex.org/W1575999478","https://openalex.org/W1860107648","https://openalex.org/W1934084512","https://openalex.org/W1973761208","https://openalex.org/W1983178058","https://openalex.org/W1983833794","https://openalex.org/W1987163291","https://openalex.org/W1992977903","https://openalex.org/W1998968952","https://openalex.org/W2030433745","https://openalex.org/W2064366207","https://openalex.org/W2081593332","https://openalex.org/W2098416578","https://openalex.org/W2098935637","https://openalex.org/W2099102906","https://openalex.org/W2099636580","https://openalex.org/W2101234009","https://openalex.org/W2106105896","https://openalex.org/W2116832440","https://openalex.org/W2118038484","https://openalex.org/W2120575449","https://openalex.org/W2122465391","https://openalex.org/W2125332694","https://openalex.org/W2130204178","https://openalex.org/W2131400476","https://openalex.org/W2131975293","https://openalex.org/W2133986470","https://openalex.org/W2145154883","https://openalex.org/W2146292423","https://openalex.org/W2149445791","https://openalex.org/W2150606131","https://openalex.org/W2167541073","https://openalex.org/W2185864411","https://openalex.org/W2240938131","https://openalex.org/W2266823300","https://openalex.org/W2293299776","https://openalex.org/W2294556882","https://openalex.org/W2302501749","https://openalex.org/W2395323716","https://openalex.org/W2402144811","https://openalex.org/W2547190417","https://openalex.org/W2547386789","https://openalex.org/W2585098096","https://openalex.org/W2611130659","https://openalex.org/W2613577915","https://openalex.org/W2616121800","https://openalex.org/W2616441648","https://openalex.org/W2743948853","https://openalex.org/W2744949745","https://openalex.org/W2752236330","https://openalex.org/W2764754194","https://openalex.org/W2784722566","https://openalex.org/W2791094827","https://openalex.org/W2798535736","https://openalex.org/W2798659310","https://openalex.org/W2799237774","https://openalex.org/W2804032941","https://openalex.org/W2804269561","https://openalex.org/W2807799957","https://openalex.org/W2904201207","https://openalex.org/W2927176210","https://openalex.org/W2943537813","https://openalex.org/W2943950766","https://openalex.org/W2948252829","https://openalex.org/W2948742859","https://openalex.org/W2950063476","https://openalex.org/W2955344300","https://openalex.org/W2963065629","https://openalex.org/W2964054286","https://openalex.org/W2964108773","https://openalex.org/W2964303709","https://openalex.org/W2970059736","https://openalex.org/W2970971581","https://openalex.org/W2971290973","https://openalex.org/W2971428651","https://openalex.org/W2981758446","https://openalex.org/W3005700362","https://openalex.org/W3013335600","https://openalex.org/W3013629321","https://openalex.org/W3013980729","https://openalex.org/W3035487250","https://openalex.org/W3036963153","https://openalex.org/W3081749234","https://openalex.org/W3082570516","https://openalex.org/W3083309956","https://openalex.org/W3084654275","https://openalex.org/W3091298939","https://openalex.org/W3098547670","https://openalex.org/W3104259571","https://openalex.org/W3141903600","https://openalex.org/W3148573243","https://openalex.org/W3148888152","https://openalex.org/W4210354100","https://openalex.org/W4211133859","https://openalex.org/W4246215794","https://openalex.org/W4365786623","https://openalex.org/W6757085416"],"related_works":["https://openalex.org/W1981780420","https://openalex.org/W2182707996","https://openalex.org/W45233828","https://openalex.org/W2964988449","https://openalex.org/W188202134","https://openalex.org/W2397952901","https://openalex.org/W2888673113","https://openalex.org/W2029380707","https://openalex.org/W4255934811","https://openalex.org/W2465382974"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"and":[3,18,29,38,58,77,84,103,116,130,139,143,151,158,173,175],"data":[4,23],"science":[5],"workflows":[6],"are":[7],"inherently":[8],"exploratory.":[9],"Data":[10],"scientists":[11],"pose":[12],"hypotheses,":[13],"integrate":[14,152],"the":[15,120,148],"necessary":[16],"data,":[17],"run":[19],"ML":[20,61,68,106,188],"pipelines":[21,189],"of":[22,35,110,123,187],"cleaning,":[24],"feature":[25],"engineering,":[26],"model":[27],"selection":[28],"hyper-parameter":[30],"tuning.":[31],"The":[32,161],"repetitive":[33],"nature":[34],"these":[36],"workflows,":[37],"their":[39],"hierarchical":[40],"composition":[41],"from":[42],"building":[43],"blocks":[44],"exhibits":[45],"high":[46],"computational":[47],"redundancy.":[48,181],"Existing":[49],"work":[50],"addresses":[51],"this":[52,90,153],"redundancy":[53,83],"with":[54,126,155,168],"coarse-grained":[55],"lineage":[56,101,124,128,135,166],"tracing":[57,102,109,129,167],"reuse":[59,104,145],"for":[60,98,137],"pipelines.":[62],"This":[63],"approach":[64],"allows":[65],"using":[66],"existing":[67],"systems,":[69],"but":[70],"views":[71],"entire":[72],"algorithms":[73],"as":[74,132,134],"black":[75],"boxes,":[76],"thus,":[78],"fails":[79],"to":[80,85,178,194],"eliminate":[81,179],"fine-grained":[82,100,165,180],"handle":[86],"internal":[87],"non-determinism.":[88],"In":[89],"paper,":[91],"we":[92],"introduce":[93],"LIMA,":[94],"a":[95,185],"practical":[96],"framework":[97,154,163],"efficient,":[99],"inside":[105],"systems.":[107],"Lineage":[108],"individual":[111],"operations":[112],"creates":[113],"new":[114],"challenges":[115],"opportunities.":[117],"We":[118],"address":[119],"large":[121],"size":[122],"traces":[125],"multi-level":[127],"reuse,":[131],"well":[133],"deduplication":[136],"loops":[138],"functions;":[140],"exploit":[141],"full":[142],"partial":[144],"opportunities":[146],"across":[147],"program":[149],"hierarchy;":[150],"task":[156],"parallelism":[157],"operator":[159],"fusion.":[160],"resulting":[162],"performs":[164],"low":[169],"overhead,":[170],"provides":[171],"versioning":[172],"reproducibility,":[174],"is":[176],"able":[177],"Our":[182],"experiments":[183],"on":[184],"variety":[186],"show":[190],"performance":[191],"improvements":[192],"up":[193],"12.4x.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
