{"id":"https://openalex.org/W2585642343","doi":"https://doi.org/10.1109/bigdata.2016.7840748","title":"LogProv: Logging events as provenance of big data analytics pipelines with trustworthiness","display_name":"LogProv: Logging events as provenance of big data analytics pipelines with trustworthiness","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2585642343","doi":"https://doi.org/10.1109/bigdata.2016.7840748","mag":"2585642343"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7840748","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840748","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100653711","display_name":"Ruoyu Wang","orcid":"https://orcid.org/0000-0002-7240-558X"},"institutions":[{"id":"https://openalex.org/I4210101388","display_name":"Health Sciences and Nutrition","ror":"https://ror.org/0152bt112","country_code":"AU","type":"facility","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4210101388","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU","CN"],"is_corresponding":true,"raw_author_name":"Ruoyu Wang","raw_affiliation_strings":["Data61, Commonwealth Scientific and Industrial Research Ogranisation, Australia","Shanghai Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Data61, Commonwealth Scientific and Industrial Research Ogranisation, Australia","institution_ids":["https://openalex.org/I4210101388","https://openalex.org/I42894916"]},{"raw_affiliation_string":"Shanghai Jiaotong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008497202","display_name":"Daniel Sun","orcid":"https://orcid.org/0000-0003-2342-7421"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Daniel Sun","raw_affiliation_strings":["The Universitv of New South Wales, Australia"],"affiliations":[{"raw_affiliation_string":"The Universitv of New South Wales, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354142","display_name":"Guoqiang Li","orcid":"https://orcid.org/0000-0001-9005-7112"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqiang Li","raw_affiliation_strings":["Shanghai Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059402413","display_name":"Muhammad Atif","orcid":"https://orcid.org/0000-0002-4139-8292"},"institutions":[{"id":"https://openalex.org/I4210141271","display_name":"National Computational Infrastructure","ror":"https://ror.org/04yx6dh41","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210141271"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Muhammad Atif","raw_affiliation_strings":["National Computational Infrastructure, Australia"],"affiliations":[{"raw_affiliation_string":"National Computational Infrastructure, Australia","institution_ids":["https://openalex.org/I4210141271"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082256444","display_name":"\u202aSurya Nepal\u202c","orcid":"https://orcid.org/0000-0002-3289-6599"},"institutions":[{"id":"https://openalex.org/I4210101388","display_name":"Health Sciences and Nutrition","ror":"https://ror.org/0152bt112","country_code":"AU","type":"facility","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4210101388","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Surya Nepal","raw_affiliation_strings":["Data61, Commonwealth Scientific and Industrial Research Ogranisation, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, Commonwealth Scientific and Industrial Research Ogranisation, Australia","institution_ids":["https://openalex.org/I4210101388","https://openalex.org/I42894916"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100653711"],"corresponding_institution_ids":["https://openalex.org/I183067930","https://openalex.org/I4210101388","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":8.487,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.97279329,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"103","issue":null,"first_page":"1402","last_page":"1411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.823104739189148},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7700570821762085},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6419687867164612},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.49777892231941223},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.49223557114601135},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4851866364479065},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.47920331358909607},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.47237804532051086},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.41497617959976196},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2785848379135132},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09298580884933472},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08580282330513}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.823104739189148},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7700570821762085},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6419687867164612},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.49777892231941223},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.49223557114601135},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4851866364479065},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.47920331358909607},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.47237804532051086},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.41497617959976196},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2785848379135132},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09298580884933472},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08580282330513}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2016.7840748","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840748","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"mag:2785542991","is_oa":false,"landing_page_url":"http://jglobal.jst.go.jp/en/public/20090422/201702280560497846","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W129448004","https://openalex.org/W195157153","https://openalex.org/W1482031469","https://openalex.org/W1515300461","https://openalex.org/W1543225912","https://openalex.org/W1575999478","https://openalex.org/W1597864774","https://openalex.org/W1858703999","https://openalex.org/W1976306141","https://openalex.org/W1988545508","https://openalex.org/W1994326726","https://openalex.org/W2062317395","https://openalex.org/W2105500738","https://openalex.org/W2113174917","https://openalex.org/W2133986470","https://openalex.org/W2288790589","https://openalex.org/W2306526421","https://openalex.org/W2345143572","https://openalex.org/W2503342965","https://openalex.org/W2528356728","https://openalex.org/W6605439808","https://openalex.org/W6607795440","https://openalex.org/W6632677431","https://openalex.org/W6634429445","https://openalex.org/W6635751388","https://openalex.org/W6665967140","https://openalex.org/W6675794220","https://openalex.org/W6698093529"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4247566972","https://openalex.org/W4394895745","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W4200136508","https://openalex.org/W2499527417"],"abstract_inverted_index":{"Provenance":[0],"is":[1,17,31,167,185,191,206,211],"information":[2,16,133],"about":[3],"the":[4,35,38,125,138,142,188,196,204,215],"origin":[5],"and":[6,13,19,58,74,112,116,147,156,177,209],"creation":[7],"of":[8,25,40,100,219],"data.":[9,42],"In":[10,23,43,67,163],"data":[11,30,44,73,83,95,102,115,120,216],"science":[12],"engineering,":[14],"such":[15],"useful":[18],"sometimes":[20],"even":[21,98],"critical.":[22],"spite":[24],"that,":[26],"provenance":[27],"for":[28,63,81,109],"big":[29,41,82,101],"under-explored":[32],"due":[33],"to":[34,48,71,93,105,124,159],"challenges":[36],"from":[37,137],"`Vs'":[39],"analytics,":[45],"users":[46,69],"need":[47,70],"query":[49,161,197],"history,":[50],"reproduce":[51],"intermediate":[52],"or":[53,97],"final":[54],"results,":[55],"tune":[56],"models,":[57],"adjust":[59],"parameters":[60],"in":[61,153,169],"runtime":[62],"making":[64],"data-driven":[65],"decisions.":[66],"addition,":[68],"evaluate":[72],"pipeline":[75,110,130],"trustworthiness.":[76],"Towards":[77],"realising":[78],"these":[79],"functionalities":[80],"provenance,":[84],"we":[85],"propose":[86],"a":[87,170,175],"solution,":[88],"called":[89],"LogProv,":[90],"which":[91,127],"needs":[92],"renovate":[94],"pipelines":[96],"some":[99],"software":[103],"infrastructure":[104],"generate":[106],"structured":[107,148],"logs":[108,117,139,143],"events,":[111],"then":[113],"stores":[114],"separately.":[118],"The":[119,180],"are":[121,144],"explicitly":[122],"linked":[123],"logs,":[126],"implicitly":[128],"record":[129],"semantics.":[131],"Semantic":[132],"can":[134,198],"be":[135,199],"retrieved":[136],"easily":[140],"since":[141,187],"well":[145],"defined":[146],"beforehand.":[149],"We":[150],"implemented":[151],"LogProv":[152,166,184],"Apache":[154],"Pig,":[155],"adopted":[157],"ElasticSearch":[158],"provide":[160],"service.":[162],"this":[164],"paper":[165],"evaluated":[168],"Hadoop":[171],"ecosystem":[172],"hosted":[173],"by":[174],"cloud":[176],"empirically":[178],"case-studied.":[179],"results":[181],"show":[182],"that":[183],"efficient":[186],"performance":[189],"overhead":[190],"no":[192,212],"more":[193],"than":[194],"10%,":[195],"responded":[200],"within":[201],"1":[202],"second,":[203],"trustworthiness":[205],"marked":[207],"clearly,":[208],"there":[210],"impact":[213],"on":[214],"processing":[217],"logic":[218],"original":[220],"pipelines.":[221]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":5},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
