{"id":"https://openalex.org/W4379522334","doi":"https://doi.org/10.1145/3589250.3596145","title":"Static Analysis of Data Transformations in Jupyter Notebooks","display_name":"Static Analysis of Data Transformations in Jupyter Notebooks","publication_year":2023,"publication_date":"2023-06-06","ids":{"openalex":"https://openalex.org/W4379522334","doi":"https://doi.org/10.1145/3589250.3596145"},"language":"en","primary_location":{"id":"doi:10.1145/3589250.3596145","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589250.3596145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-04249950v1/document","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004173744","display_name":"Luca Negrini","orcid":"https://orcid.org/0000-0001-9930-8854"},"institutions":[{"id":"https://openalex.org/I4210106247","display_name":"Corvallis Environmental Center","ror":"https://ror.org/01kdb6822","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210106247"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luca Negrini","raw_affiliation_strings":["Corvallis, Italy","Corvallis Srl (Italy)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Corvallis, Italy","institution_ids":["https://openalex.org/I4210106247"]},{"raw_affiliation_string":"Corvallis Srl (Italy)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092098736","display_name":"Guruprerana Shabadi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145102","display_name":"Institut Polytechnique de Paris","ror":"https://ror.org/042tfbd02","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210145102"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Guruprerana Shabadi","raw_affiliation_strings":["\u00c9cole Polytechnique, France / Institut Polytechnique de Paris, France","IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"\u00c9cole Polytechnique, France / Institut Polytechnique de Paris, France","institution_ids":[]},{"raw_affiliation_string":"IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)","institution_ids":["https://openalex.org/I4210145102"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077439185","display_name":"Caterina Urban","orcid":"https://orcid.org/0000-0002-8127-9642"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Caterina Urban","raw_affiliation_strings":["Inria Paris, France / ENS, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inria Paris, France / ENS, France","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.4284,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.93271325,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.8503867387771606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8324571847915649},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.6756019592285156},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6726994514465332},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.5205619931221008},{"id":"https://openalex.org/keywords/static-analysis","display_name":"Static analysis","score":0.49163180589675903},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4435290992259979},{"id":"https://openalex.org/keywords/static-program-analysis","display_name":"Static program analysis","score":0.44055622816085815},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4179564118385315},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4174732565879822},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3868514597415924},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3448975384235382},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.2788746953010559},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.13629701733589172}],"concepts":[{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.8503867387771606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8324571847915649},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6756019592285156},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6726994514465332},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.5205619931221008},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.49163180589675903},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4435290992259979},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.44055622816085815},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4179564118385315},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4174732565879822},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3868514597415924},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3448975384235382},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2788746953010559},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.13629701733589172},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3589250.3596145","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589250.3596145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04249950v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-04249950","pdf_url":"https://inria.hal.science/hal-04249950v1/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"12th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis (SOAP 2023), Jun 2023, Orlando FL, United States. pp.8-13, &#x27E8;10.1145/3589250.3596145&#x27E9;","raw_type":"Conference papers"},{"id":"pmh:oai:iris.unive.it:10278/5026140","is_oa":true,"landing_page_url":"https://hdl.handle.net/10278/5026140","pdf_url":"https://iris.unive.it/bitstream/10278/5026140/1/preprint.pdf","source":{"id":"https://openalex.org/S4306402336","display_name":"ARCA (Universit\u00e0 Ca' Foscari Venezia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149461666","host_organization_name":"Ca' Foscari University of Venice","host_organization_lineage":["https://openalex.org/I149461666"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-04249950v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-04249950","pdf_url":"https://inria.hal.science/hal-04249950v1/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"12th ACM SIGPLAN International Workshop on the State Of the Art in Program Analysis (SOAP 2023), Jun 2023, Orlando FL, United States. pp.8-13, &#x27E8;10.1145/3589250.3596145&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[{"score":0.8600000143051147,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4379522334.pdf","grobid_xml":"https://content.openalex.org/works/W4379522334.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W2081810835","https://openalex.org/W2797273612","https://openalex.org/W3045754299","https://openalex.org/W3105684693","https://openalex.org/W3173410319","https://openalex.org/W3173683037","https://openalex.org/W3207511826","https://openalex.org/W4287237266","https://openalex.org/W4385223399"],"related_works":["https://openalex.org/W4388483122","https://openalex.org/W4387126921","https://openalex.org/W3030592833","https://openalex.org/W2354385412","https://openalex.org/W2032897247","https://openalex.org/W4297908618","https://openalex.org/W4245904369","https://openalex.org/W2985048382","https://openalex.org/W2765641823","https://openalex.org/W4386447154"],"abstract_inverted_index":{"Jupyter":[0],"notebooks":[1],"used":[2],"to":[3,18,27,36,74,105],"pre-process":[4],"and":[5,12,33],"polish":[6],"raw":[7],"data":[8,10],"for":[9,58],"science":[11],"machine":[13],"learning":[14],"processes":[15],"are":[16],"challenging":[17],"analyze.":[19],"Their":[20],"data-centric":[21],"code":[22],"manipulates":[23],"dataframes":[24,112],"through":[25],"call":[26],"library":[28],"functions":[29],"with":[30],"complex":[31],"semantics,":[32],"the":[34,44,67,75,91,107,111,115],"properties":[35,78],"track":[37],"over":[38],"it":[39],"vary":[40],"widely":[41],"depending":[42],"on":[43],"verification":[45],"task.":[46],"This":[47],"paper":[48],"presents":[49],"a":[50,63,97],"novel":[51],"abstract":[52],"domain":[53],"that":[54,69,87,101],"simplifies":[55],"writing":[56],"analyses":[57],"such":[59,85],"programs,":[60],"by":[61,83,114],"extracting":[62],"unique":[64],"CFG":[65],"from":[66],"notebook":[68],"contains":[70],"all":[71],"transformations":[72],"applied":[73],"data.":[76],"Several":[77],"can":[79],"then":[80],"be":[81],"determined":[82],"analyzing":[84],"CFG,":[86],"is":[88],"simpler":[89],"than":[90],"original":[92],"Python":[93],"code.":[94],"We":[95],"present":[96],"first":[98],"use":[99],"case":[100],"exploits":[102],"our":[103],"analysis":[104],"infer":[106],"required":[108],"shape":[109],"of":[110],"manipulated":[113],"notebook.":[116]},"counts_by_year":[{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-21T07:57:09.225873","created_date":"2025-10-10T00:00:00"}
