{"id":"https://openalex.org/W7154584108","doi":"https://doi.org/10.48550/arxiv.2604.13351","title":"Optimal Predicate Pushdown Synthesis","display_name":"Optimal Predicate Pushdown Synthesis","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7154584108","doi":"https://doi.org/10.48550/arxiv.2604.13351"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.13351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.13351","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069484796","display_name":"Robert Zhang","orcid":"https://orcid.org/0000-0002-7946-6094"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Robert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078345177","display_name":"Eric Hayden Campbell","orcid":"https://orcid.org/0000-0001-5954-2136"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Campbell, Eric Hayden","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110710845","display_name":"Dixin Tang","orcid":"https://orcid.org/0000-0002-3316-6651"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Dixin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5006424908","display_name":"I\u015f\u0131l Dillig","orcid":"https://orcid.org/0000-0001-8006-1230"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dillig, Isil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069484796"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.14800000190734863,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.14800000190734863,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.09929999709129333,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.08060000091791153,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedded-pushdown-automaton","display_name":"Embedded pushdown automaton","score":0.5353000164031982},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5123999714851379},{"id":"https://openalex.org/keywords/predicate","display_name":"Predicate (mathematical logic)","score":0.4426000118255615},{"id":"https://openalex.org/keywords/stateful-firewall","display_name":"Stateful firewall","score":0.36390000581741333},{"id":"https://openalex.org/keywords/predicate-transformer-semantics","display_name":"Predicate transformer semantics","score":0.32589998841285706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73089998960495},{"id":"https://openalex.org/C27034413","wikidata":"https://www.wikidata.org/wiki/Q5370010","display_name":"Embedded pushdown automaton","level":5,"score":0.5353000164031982},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5123999714851379},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.46059998869895935},{"id":"https://openalex.org/C140146324","wikidata":"https://www.wikidata.org/wiki/Q1144319","display_name":"Predicate (mathematical logic)","level":2,"score":0.4426000118255615},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4018999934196472},{"id":"https://openalex.org/C22927095","wikidata":"https://www.wikidata.org/wiki/Q1784206","display_name":"Stateful firewall","level":3,"score":0.36390000581741333},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3555000126361847},{"id":"https://openalex.org/C30128091","wikidata":"https://www.wikidata.org/wiki/Q291929","display_name":"Predicate transformer semantics","level":4,"score":0.32589998841285706},{"id":"https://openalex.org/C77944639","wikidata":"https://www.wikidata.org/wiki/Q751443","display_name":"Pushdown automaton","level":3,"score":0.31349998712539673},{"id":"https://openalex.org/C74919329","wikidata":"https://www.wikidata.org/wiki/Q378713","display_name":"Deterministic pushdown automaton","level":5,"score":0.29440000653266907},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2612999975681305}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.13351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.13351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Predicate":[0],"pushdown":[1,91,98,145,191,210],"is":[2,24,182],"a":[3,15,63,85,100,116,129,138,162,194],"long-standing":[4],"performance":[5,74],"optimization":[6],"that":[7,105,141,180,207],"filters":[8,66],"data":[9,20],"as":[10,12,42,99],"early":[11],"possible":[13],"in":[14,38,62,161],"computational":[16],"workflow.":[17],"In":[18],"modern":[19],"pipelines,":[21],"this":[22,125,159],"transformation":[23],"especially":[25],"important":[26],"because":[27],"much":[28],"of":[29,68,109,198,219,226],"the":[30,58,149,208],"computation":[31],"occurs":[32],"inside":[33],"user-defined":[34],"functions":[35],"(UDFs)":[36],"written":[37],"general-purpose":[39],"languages":[40],"such":[41,69],"Python":[43],"and":[44,52,55,131,153,166,173,221],"Scala.":[45],"These":[46],"UDFs":[47,70],"capture":[48],"rich":[49],"domain":[50],"logic":[51],"complex":[53],"aggregations":[54],"are":[56],"among":[57],"most":[59],"expensive":[60],"operations":[61],"pipeline.":[64],"Moving":[65],"ahead":[67],"can":[71],"yield":[72],"substantial":[73],"gains,":[75],"but":[76],"doing":[77],"so":[78],"requires":[79],"semantic":[80,87],"reasoning.":[81],"This":[82],"paper":[83],"introduces":[84],"general":[86],"foundation":[88],"for":[89,135],"predicate":[90],"over":[92],"stateful":[93],"fold-based":[94],"computations.":[95],"We":[96,157],"view":[97],"correspondence":[101],"between":[102],"two":[103,224],"programs":[104],"process":[106],"different":[107],"subsets":[108],"input":[110],"data,":[111],"with":[112,193],"correctness":[113],"witnessed":[114],"by":[115,147,216],"bisimulation":[117],"invariant":[118],"relating":[119],"their":[120],"internal":[121],"states.":[122],"Building":[123],"on":[124,169],"foundation,":[126],"we":[127],"develop":[128],"sound":[130],"relatively":[132],"complete":[133],"framework":[134],"verification,":[136],"alongside":[137],"synthesis":[139,196],"algorithm":[140],"automatically":[142],"constructs":[143],"optimal":[144,190],"decompositions":[146],"finding":[148],"strongest":[150],"admissible":[151],"pre-filters":[152],"weakest":[154],"residual":[155],"post-filters.":[156],"implement":[158],"approach":[160],"tool":[163],"called":[164],"Pusharoo":[165,181],"evaluate":[167],"it":[168],"150":[170],"real-world":[171],"pandas":[172],"Spark":[174],"data-processing":[175],"pipelines.":[176],"Our":[177],"evaluation":[178],"shows":[179],"significantly":[183],"more":[184],"expressive":[185],"than":[186],"prior":[187],"work,":[188],"producing":[189],"transformations":[192],"median":[195],"time":[197],"1.6":[199],"seconds":[200],"per":[201],"benchmark.":[202],"Furthermore,":[203],"our":[204],"experiments":[205],"demonstrate":[206],"discovered":[209],"optimizations":[211],"speed":[212],"up":[213,222],"end-to-end":[214],"execution":[215],"an":[217],"average":[218],"2.4$\\times$":[220],"to":[223],"orders":[225],"magnitude.":[227]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-17T00:00:00"}
