{"id":"https://openalex.org/W7155511890","doi":"https://doi.org/10.48550/arxiv.2604.21413","title":"RUBICON: Agentic AI for Messy Enterprise Data","display_name":"RUBICON: Agentic AI for Messy Enterprise Data","publication_year":2026,"publication_date":"2026-04-23","ids":{"openalex":"https://openalex.org/W7155511890","doi":"https://doi.org/10.48550/arxiv.2604.21413"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.21413","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106066935","display_name":"Fabian Wenz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenz, Fabian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134547376","display_name":"Felix Treutwein","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Treutwein, Felix","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134554963","display_name":"Kai Arenja","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arenja, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027360874","display_name":"\u00c7a\u011fatay Demiralp","orcid":"https://orcid.org/0009-0003-2080-0443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Demiralp, \u00c7agatay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134505531","display_name":"Michael Stonebraker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stonebraker, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.3160000145435333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.3160000145435333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.20679999887943268,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.17880000174045563,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6732000112533569},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.61080002784729},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.567300021648407},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4684000015258789},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4147999882698059},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.39469999074935913},{"id":"https://openalex.org/keywords/enterprise-information-system","display_name":"Enterprise information system","score":0.36469998955726624},{"id":"https://openalex.org/keywords/view","display_name":"View","score":0.33480000495910645},{"id":"https://openalex.org/keywords/datalog","display_name":"Datalog","score":0.3328000009059906}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.794700026512146},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6732000112533569},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.61080002784729},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.567300021648407},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4684000015258789},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4147999882698059},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C27295321","wikidata":"https://www.wikidata.org/wiki/Q831795","display_name":"Enterprise information system","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.353300005197525},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3508000075817108},{"id":"https://openalex.org/C54239708","wikidata":"https://www.wikidata.org/wiki/Q1329910","display_name":"View","level":3,"score":0.33480000495910645},{"id":"https://openalex.org/C148230440","wikidata":"https://www.wikidata.org/wiki/Q1172264","display_name":"Datalog","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3240000009536743},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29420000314712524},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29269999265670776},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C10590034","wikidata":"https://www.wikidata.org/wiki/Q1048431","display_name":"Enterprise architecture","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C102993220","wikidata":"https://www.wikidata.org/wiki/Q387196","display_name":"Description logic","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C67571701","wikidata":"https://www.wikidata.org/wiki/Q1318054","display_name":"Enterprise system","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C2776543384","wikidata":"https://www.wikidata.org/wiki/Q593289","display_name":"Information access","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.2565000057220459},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C105446022","wikidata":"https://www.wikidata.org/wiki/Q445962","display_name":"Legacy system","level":3,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Enterprise":[0],"data":[1,125,136,234],"exists":[2],"in":[3],"many":[4],"forms,":[5],"such":[6],"as":[7,64,147],"tables,":[8],"text,":[9],"maps,":[10],"e-mail,":[11],"and":[12,18,51,62,94,126,168,172,195,209,220],"CAD":[13],"models,":[14],"that":[15,71,109,227],"are":[16],"access-controlled":[17],"hidden":[19],"behind":[20],"bespoke":[21],"interfaces.":[22],"Current":[23],"agentic":[24,81,166,191],"AI":[25,82],"systems":[26],"delegate":[27],"the":[28,40,65,148],"entire":[29],"query":[30,89,102],"workflow":[31],"to":[32,131],"a":[33,36,87,95,101,107,228],"frontier":[34],"LLM:":[35],"single":[37],"model":[38],"interprets":[39],"request,":[41],"selects":[42],"sources":[43],"or":[44],"tools,":[45],"integrates":[46],"retrieved":[47],"evidence,":[48],"judges":[49],"completeness,":[50],"generates":[52],"an":[53,74],"answer,":[54],"with":[55],"few":[56],"constraints,":[57],"limited":[58],"use":[59],"of":[60],"schemas,":[61],"text":[63],"primary":[66],"representation":[67],"throughout.":[68],"We":[69,104,156],"argue":[70],"this":[72,111],"is":[73,114,142],"ineffective":[75],"abstraction":[76,150],"for":[77],"enterprise":[78,124,182,233],"data.":[79],"Reliable":[80],"should":[83],"instead":[84],"require":[85,178],"structure:":[86],"constrained":[88],"interface":[90],"over":[91],"each":[92],"source":[93],"table-centric":[96,229],"integration":[97,137],"layer":[98],"driven":[99],"by":[100,218,224],"processor.":[103],"introduce":[105],"RUBICON,":[106],"system":[108],"embodies":[110],"vision.":[112],"RUBICON":[113,158,184,205],"based":[115],"on":[116,122,159],"two":[117,160],"observations.":[118],"First,":[119],"text-to-SQL":[120],"fails":[121],"real":[123],"must":[127],"be":[128],"dramatically":[129],"subsetted":[130],"achieve":[132],"reliable":[133],"results.":[134],"Second,":[135],"across":[138,180],"disparate":[139],"corporate":[140],"datasets":[141],"best":[143],"performed":[144],"using":[145],"tables":[146],"core":[149],"rather":[151],"than":[152],"text-centric":[153],"LLM":[154],"pipelines.":[155],"evaluate":[157],"benchmarks:":[161],"our":[162],"enterprise-focused":[163],"RUBICON-Bench,":[164,175],"against":[165,170],"baselines,":[167,192],"SemBench,":[169,204],"LOTUS":[171,208],"Palimpzest.":[173],"On":[174,203],"where":[176],"queries":[177],"coordination":[179],"heterogeneous":[181],"sources,":[183],"achieves":[185,212],"100%":[186],"end-to-end":[187],"accuracy,":[188,215],"while":[189,235],"all":[190],"including":[193],"single-":[194],"multi-agent":[196],"ReAct":[197],"systems,":[198],"produce":[199],"no":[200],"correct":[201],"answers.":[202],"surpasses":[206],"both":[207],"Palimpzest:":[210],"it":[211],"14.7%":[213],"higher":[214],"reduces":[216],"latency":[217],"62.64%,":[219],"lowers":[221],"token":[222],"cost":[223],"98.64%,":[225],"demonstrating":[226],"architecture":[230],"better":[231],"matches":[232],"yielding":[236],"significant":[237],"efficiency":[238],"gains.":[239]},"counts_by_year":[],"updated_date":"2026-06-12T06:20:11.936012","created_date":"2026-04-25T00:00:00"}
