{"id":"https://openalex.org/W4281684287","doi":"https://doi.org/10.1145/3520313.3534657","title":"Statically detecting data leakages in data science code","display_name":"Statically detecting data leakages in data science code","publication_year":2022,"publication_date":"2022-06-09","ids":{"openalex":"https://openalex.org/W4281684287","doi":"https://doi.org/10.1145/3520313.3534657"},"language":"en","primary_location":{"id":"doi:10.1145/3520313.3534657","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3520313.3534657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 11th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009881146","display_name":"Pavle Suboti\u0107","orcid":"https://orcid.org/0000-0002-6536-3932"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pavle Suboti\u0107","raw_affiliation_strings":["Microsoft, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Serbia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027061112","display_name":"Uro\u0161 Bojani\u0107","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Uro\u0161 Bojani\u0107","raw_affiliation_strings":["Microsoft, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Serbia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046825007","display_name":"Milan Stoji\u0107","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Milan Stoji\u0107","raw_affiliation_strings":["Microsoft, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Serbia","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009881146"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8915,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.71245576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"16","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7995068430900574},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7753146886825562},{"id":"https://openalex.org/keywords/static-analysis","display_name":"Static analysis","score":0.5446736812591553},{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.48822954297065735},{"id":"https://openalex.org/keywords/real-world-data","display_name":"Real world data","score":0.4781312048435211},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4534483253955841},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43845197558403015},{"id":"https://openalex.org/keywords/information-leakage","display_name":"Information leakage","score":0.4218409061431885},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4194382429122925},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22780853509902954},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.22120001912117004},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.18227988481521606},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18138906359672546},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.11192625761032104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7995068430900574},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7753146886825562},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.5446736812591553},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.48822954297065735},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.4781312048435211},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4534483253955841},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43845197558403015},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.4218409061431885},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4194382429122925},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22780853509902954},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22120001912117004},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.18227988481521606},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18138906359672546},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.11192625761032104},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3520313.3534657","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3520313.3534657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 11th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1503248989","https://openalex.org/W2043100293","https://openalex.org/W2044879407","https://openalex.org/W2046213250","https://openalex.org/W2064617810","https://openalex.org/W2797273612","https://openalex.org/W2978700448","https://openalex.org/W3082494217","https://openalex.org/W3110358918","https://openalex.org/W3130391092","https://openalex.org/W3207511826","https://openalex.org/W4284677512"],"related_works":["https://openalex.org/W2151320244","https://openalex.org/W4213243744","https://openalex.org/W1570829627","https://openalex.org/W2295023886","https://openalex.org/W2969365378","https://openalex.org/W3123987581","https://openalex.org/W2358406440","https://openalex.org/W2352435628","https://openalex.org/W151758239","https://openalex.org/W3175365978"],"abstract_inverted_index":{"Data":[0,9],"leakage":[1,10,70],"is":[2,19,84],"a":[3,23,28,67],"well-known":[4],"problem":[5],"in":[6,35],"machine":[7],"learning.":[8],"occurs":[11,57],"when":[12],"information":[13],"from":[14],"outside":[15],"the":[16,36,40,47,106,120,127],"training":[17],"dataset":[18],"used":[20],"to":[21,43,72,86],"create":[22],"model.":[24],"This":[25],"phenomenon":[26],"renders":[27],"model":[29,41],"excessively":[30],"optimistic":[31],"or":[32],"even":[33],"useless":[34],"real":[37,116],"world":[38,117],"since":[39],"tends":[42],"leverage":[44],"greatly":[45],"on":[46,115],"unfairly":[48],"acquired":[49],"information.":[50],"To":[51,119],"date,":[52],"detection":[53,130],"of":[54,76,122,131],"data":[55,69,77,97,132,134],"leakages":[56,78],"post-mortem":[58],"using":[59],"runtime":[60],"methods.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65,125],"develop":[66],"static":[68,108,129],"analysis":[71,83,104],"detect":[73],"several":[74],"instances":[75],"during":[79],"development":[80],"time.":[81],"Our":[82],"constructed":[85],"be":[87,93],"lightweight":[88],"so":[89],"that":[90],"it":[91],"can":[92],"performed":[94],"within":[95],"interactive":[96],"science":[98,133],"notebooks.":[99],"We":[100],"have":[101],"integrated":[102],"our":[103,123],"into":[105],"NBLyzer":[107],"analyzer":[109],"framework":[110],"and":[111],"show":[112],"its":[113],"utility":[114],"benchmarks.":[118],"best":[121],"knowledge,":[124],"propose":[126],"first":[128],"leakages.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
