{"id":"https://openalex.org/W4386768917","doi":"https://doi.org/10.14778/3611540.3611555","title":"OneProvenance: Efficient Extraction of Dynamic Coarse-Grained Provenance from Database Query Event Logs","display_name":"OneProvenance: Efficient Extraction of Dynamic Coarse-Grained Provenance from Database Query Event Logs","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4386768917","doi":"https://doi.org/10.14778/3611540.3611555"},"language":"en","primary_location":{"id":"doi:10.14778/3611540.3611555","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3611540.3611555","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059914414","display_name":"Fotis Psallidas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fotis Psallidas","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061314881","display_name":"Ashvin Agrawal","orcid":"https://orcid.org/0009-0004-7862-0995"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ashvin Agrawal","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045358485","display_name":"Chandru Sugunan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chandru Sugunan","raw_affiliation_strings":["Snowflake"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Snowflake","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062016419","display_name":"Khaled Z. Ibrahim","orcid":"https://orcid.org/0009-0004-5362-3612"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Khaled Ibrahim","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053293674","display_name":"Konstantinos Karanasos","orcid":"https://orcid.org/0009-0007-6975-2568"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konstantinos Karanasos","raw_affiliation_strings":["Meta"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058019805","display_name":"Jes\u00fas Camacho-Rodr\u00edguez","orcid":"https://orcid.org/0009-0008-9151-6024"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jes\u00fas Camacho-Rodr\u00edguez","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020678151","display_name":"Avrilia Floratou","orcid":"https://orcid.org/0009-0007-5760-8657"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Avrilia Floratou","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010481434","display_name":"Carlo Curino","orcid":"https://orcid.org/0000-0003-3712-7358"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Carlo Curino","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051301731","display_name":"Raghu Ramakrishnan","orcid":"https://orcid.org/0009-0007-5086-7664"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Raghu Ramakrishnan","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5535,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.89782411,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"16","issue":"12","first_page":"3662","last_page":"3675"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8442668914794922},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6421416997909546},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5847330093383789},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5240262150764465},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5191701650619507},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5113269686698914},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.48760786652565},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.47762531042099},{"id":"https://openalex.org/keywords/provenance","display_name":"Provenance","score":0.46730801463127136},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4149796664714813},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.11319497227668762},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08427417278289795}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8442668914794922},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6421416997909546},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5847330093383789},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5240262150764465},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5191701650619507},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5113269686698914},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.48760786652565},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.47762531042099},{"id":"https://openalex.org/C2780049196","wikidata":"https://www.wikidata.org/wiki/Q23582628","display_name":"Provenance","level":2,"score":0.46730801463127136},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4149796664714813},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11319497227668762},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08427417278289795},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C5900021","wikidata":"https://www.wikidata.org/wiki/Q163082","display_name":"Petrology","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3611540.3611555","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3611540.3611555","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W297231882","https://openalex.org/W760598031","https://openalex.org/W1988497694","https://openalex.org/W2004281168","https://openalex.org/W2063103859","https://openalex.org/W2069028640","https://openalex.org/W2092833514","https://openalex.org/W2102729564","https://openalex.org/W2138199375","https://openalex.org/W2140116426","https://openalex.org/W2145154883","https://openalex.org/W2152812436","https://openalex.org/W2162520370","https://openalex.org/W2171262729","https://openalex.org/W2425316268","https://openalex.org/W2438792749","https://openalex.org/W2612758631","https://openalex.org/W2766945679","https://openalex.org/W2795089200","https://openalex.org/W2798664493","https://openalex.org/W2805350385","https://openalex.org/W2805350738","https://openalex.org/W2810954846","https://openalex.org/W2952851516","https://openalex.org/W2963174348","https://openalex.org/W2970992672","https://openalex.org/W3086791196","https://openalex.org/W3164278511","https://openalex.org/W3180107114","https://openalex.org/W4255767193","https://openalex.org/W4286432951","https://openalex.org/W4302802341","https://openalex.org/W6753529518"],"related_works":["https://openalex.org/W2354627941","https://openalex.org/W2347483153","https://openalex.org/W2353379336","https://openalex.org/W2379683085","https://openalex.org/W2363868702","https://openalex.org/W2374448931","https://openalex.org/W2376723740","https://openalex.org/W2370535391","https://openalex.org/W2370679613","https://openalex.org/W2392768766"],"abstract_inverted_index":{"Provenance":[0,60],"encodes":[1],"information":[2],"that":[3,102,158,174],"connects":[4],"datasets,":[5],"their":[6],"generation":[7],"workflows,":[8],"and":[9,34,54,94,98,109,163,193,206],"associated":[10],"metadata":[11],"(e.g.,":[12,32],"who":[13],"or":[14],"when":[15],"executed":[16],"a":[17,25,47],"query).":[18],"As":[19],"such,":[20],"it":[21],"is":[22,46,103,110,199],"instrumental":[23],"for":[24,81,114,160],"wide":[26],"range":[27],"of":[28,40,57,138],"critical":[29],"governance":[30],"applications":[31],"observability":[33],"auditing).":[35],"Unfortunately,":[36],"in":[37,73,77],"the":[38,52,92,135,190],"context":[39],"database":[41,58,93],"systems,":[42],"extracting":[43,152],"coarse-grained":[44],"provenance":[45,79,82,95,101,126,153,210],"long-standing":[48],"problem":[49],"due":[50],"to":[51,91,181,184],"complexity":[53],"sheer":[55],"volume":[56],"workflows.":[59],"extraction":[61,96,127,140,178,191,211],"from":[62,129],"query":[63,106,130,144,161],"event":[64,131,156],"logs":[65],"has":[66],"been":[67],"recently":[68],"proposed":[69],"as":[70],"favorable":[71],"because,":[72],"principle,":[74],"can":[75,176],"result":[76],"meaningful":[78],"graphs":[80],"applications.":[83,116],"Current":[84],"approaches,":[85],"however,":[86],"(a)":[87,142],"add":[88],"substantial":[89],"overhead":[90],"workflows":[97],"(b)":[99,151],"extract":[100],"noisy,":[104],"omits":[105],"execution":[107,145],"dependencies,":[108,162],"not":[111],"rich":[112],"enough":[113],"upstream":[115],"To":[117],"address":[118],"these":[119],"problems,":[120],"we":[121],"introduce":[122],"OneProvenance:":[123],"an":[124],"efficient":[125,148],"system":[128],"logs.":[132],"OneProvenance":[133,175,198],"addresses":[134],"unique":[136],"challenges":[137],"log-based":[139],"by":[141,179,203],"identifying":[143],"dependencies":[146],"through":[147,154],"log":[149],"analysis,":[150],"novel":[155],"transformations":[157],"account":[159],"(c)":[164],"introducing":[165],"effective":[166],"filtering":[167],"optimizations.":[168],"Our":[169],"thorough":[170],"experimental":[171],"analysis":[172],"shows":[173],"improve":[177],"up":[180],"~18X":[182],"compared":[183],"state-of-the-art":[185],"baselines;":[186],"our":[187],"optimizations":[188],"reduce":[189],"noise":[192],"optimize":[194],"performance":[195],"even":[196],"further.":[197],"deployed":[200],"at":[201],"scale":[202],"Microsoft":[204],"Purview":[205],"actively":[207],"supports":[208],"customer":[209],"needs":[212],"(https://bit.ly/3N2JVGF).":[213]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
