{"id":"https://openalex.org/W7118486672","doi":"https://doi.org/10.48550/arxiv.2601.00995","title":"Grain-Aware Data Transformations: Type-Level Formal Verification at Zero Computational Cost","display_name":"Grain-Aware Data Transformations: Type-Level Formal Verification at Zero Computational Cost","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7118486672","doi":"https://doi.org/10.48550/arxiv.2601.00995"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00995","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058121165","display_name":"Nikos Karayannidis","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karayannidis, Nikos","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5058121165"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.31619998812675476,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.31619998812675476,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.2831000089645386,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.11110000312328339,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.8066999912261963},{"id":"https://openalex.org/keywords/formal-verification","display_name":"Formal verification","score":0.5830000042915344},{"id":"https://openalex.org/keywords/mathematical-proof","display_name":"Mathematical proof","score":0.43939998745918274},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.428600013256073},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4097999930381775},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.40230000019073486},{"id":"https://openalex.org/keywords/data-verification","display_name":"Data verification","score":0.4018999934196472},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.38119998574256897},{"id":"https://openalex.org/keywords/proof-assistant","display_name":"Proof assistant","score":0.3587000072002411}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.8066999912261963},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7975999712944031},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.5830000042915344},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5070000290870667},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4772999882698059},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.43939998745918274},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4097999930381775},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C49895821","wikidata":"https://www.wikidata.org/wiki/Q5227368","display_name":"Data verification","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C203265346","wikidata":"https://www.wikidata.org/wiki/Q11387554","display_name":"Proof assistant","level":3,"score":0.3587000072002411},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.34700000286102295},{"id":"https://openalex.org/C202973057","wikidata":"https://www.wikidata.org/wiki/Q7380130","display_name":"Runtime verification","level":3,"score":0.3434000015258789},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C62460635","wikidata":"https://www.wikidata.org/wiki/Q5508853","display_name":"Functional verification","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.3370000123977661},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3287000060081482},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3028999865055084},{"id":"https://openalex.org/C150670458","wikidata":"https://www.wikidata.org/wiki/Q4272815","display_name":"Data transformation","level":3,"score":0.3003000020980835},{"id":"https://openalex.org/C33054407","wikidata":"https://www.wikidata.org/wiki/Q6504747","display_name":"Software verification","level":5,"score":0.2912999987602234},{"id":"https://openalex.org/C116253237","wikidata":"https://www.wikidata.org/wiki/Q1437424","display_name":"Formal specification","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C159718280","wikidata":"https://www.wikidata.org/wiki/Q5526353","display_name":"Gas meter prover","level":3,"score":0.2727999985218048},{"id":"https://openalex.org/C206880738","wikidata":"https://www.wikidata.org/wiki/Q431667","display_name":"Automated theorem proving","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C2779791154","wikidata":"https://www.wikidata.org/wiki/Q258040","display_name":"Model transformation","level":3,"score":0.25870001316070557}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"transformation":[1,97],"correctness":[2,169,221],"is":[3],"a":[4,78,113],"major":[5],"challenge":[6],"in":[7,74,206,236],"data":[8,23,39,85,184,199,237],"engineering:":[9],"how":[10],"to":[11,30,36,77,83,162,229],"verify":[12,163],"pipeline":[13,166],"accuracy":[14],"before":[15,183],"deployment.":[16],"Traditional":[17],"methods":[18,235],"involve":[19],"costly":[20],"iterative":[21],"testing,":[22],"materialization,":[24],"and":[25,54,110,138,180,239],"manual":[26],"error":[27],"detection,":[28],"due":[29],"the":[31,61,90,120,188],"lack":[32],"of":[33,66,96,123,190,243],"formal":[34,204,234],"approaches":[35],"reasoning":[37],"about":[38],"granularity":[40],"(grain),":[41],"which":[42],"can":[43,218],"shift":[44],"during":[45],"transformations,":[46],"causing":[47],"issues":[48,182],"like":[49],"fan":[50,176],"traps":[51,56],"(metrics":[52],"duplication)":[53],"chasm":[55,178],"(data":[57],"loss).":[58],"We":[59,103,201],"introduce":[60],"first":[62],"formal,":[63],"mathematical":[64],"definition":[65],"grain,":[67,191],"extending":[68],"it":[69],"from":[70,101,125,226],"an":[71],"informal":[72],"concept":[73],"dimensional":[75],"modeling":[76],"universal,":[79],"type-theoretic":[80],"framework":[81],"applicable":[82],"any":[84],"type.":[86],"Encoding":[87],"grain":[88,107,115,122],"into":[89],"type":[91],"system":[92],"allows":[93,160],"compile-time":[94],"verification":[95,150,210],"correctness,":[98],"shifting":[99,223],"validation":[100],"runtime.":[102],"define":[104],"three":[105],"core":[106],"relations-equality,":[108],"ordering,":[109],"incomparability-and":[111],"prove":[112],"general":[114],"inference":[116,143],"theorem":[117],"that":[118,164],"computes":[119],"output":[121],"equi-joins":[124],"input":[126],"grains":[127],"using":[128],"type-level":[129],"operations.":[130],"This":[131],"covers":[132],"all":[133,198],"join":[134],"scenarios,":[135],"including":[136],"comparable":[137],"incomparable":[139],"keys.":[140],"Together":[141],"with":[142,246],"rules":[144],"for":[145],"relational":[146],"operations,":[147],"this":[148],"enables":[149],"through":[151],"schema":[152],"analysis":[153],"alone,":[154],"at":[155],"zero":[156],"cost.":[157],"Our":[158],"approach":[159],"engineers":[161],"entire":[165],"DAGs":[167],"maintain":[168],"properties,":[170],"detecting":[171],"grain-related":[172],"errors":[173],"such":[174],"as":[175],"traps,":[177,179],"aggregation":[181],"processing.":[185],"It":[186],"emphasizes":[187],"importance":[189],"focusing":[192],"on":[193],"critical":[194],"characteristics":[195],"rather":[196],"than":[197],"details.":[200],"provide":[202],"machine-checked":[203],"proofs":[205],"Lean":[207],"4,":[208],"reducing":[209],"costs":[211],"by":[212],"98-99%.":[213],"Additionally,":[214],"large":[215],"language":[216],"models":[217],"automatically":[219],"generate":[220],"proofs,":[222],"human":[224],"effort":[225],"proof":[227,230],"writing":[228],"verification,":[231],"thus":[232],"democratizing":[233],"engineering":[238],"supporting":[240],"confident":[241],"deployment":[242],"AI-generated":[244],"pipelines":[245],"machine-checkable":[247],"guarantees.":[248]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
