{"id":"https://openalex.org/W7152457262","doi":"https://doi.org/10.48550/arxiv.2604.06566","title":"AI-Driven Research for Databases","display_name":"AI-Driven Research for Databases","publication_year":2026,"publication_date":"2026-04-08","ids":{"openalex":"https://openalex.org/W7152457262","doi":"https://doi.org/10.48550/arxiv.2604.06566"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06566","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06566","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06566","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133239395","display_name":"Audrey Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cheng, Audrey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028208972","display_name":"Harald Ng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ng, Harald","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017609725","display_name":"Aaron Kabcenell","orcid":"https://orcid.org/0000-0001-8951-3360"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kabcenell, Aaron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002538469","display_name":"Peter Bailis","orcid":"https://orcid.org/0000-0003-1166-7823"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bailis, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133294040","display_name":"Matei Zaharia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaharia, Matei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133270313","display_name":"Lin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133277706","display_name":"Xiao Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133309339","display_name":"Ion Stoica","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stoica, Ion","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5133239395"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.6104999780654907,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.6104999780654907,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.23469999432563782,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.01489999983459711,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8064000010490417},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5755000114440918},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4778999984264374},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.462799996137619},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.38019999861717224},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.36000001430511475}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8371999859809875},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8064000010490417},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5755000114440918},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4778999984264374},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.462799996137619},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.45010000467300415},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4323999881744385},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.38019999861717224},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32280001044273376},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2732999920845032},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C2989070954","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database query","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06566","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06566","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06566","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06566","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"the":[1,68,108,119,127,131,152,177,181],"complexity":[2],"of":[3,32,77,111,121,133,154,183],"modern":[4],"workloads":[5],"and":[6,12,86,145],"hardware":[7],"increasingly":[8],"outpaces":[9],"human":[10,80],"research":[11],"engineering":[13],"capacity,":[14],"existing":[15],"methods":[16],"for":[17,37,102,191],"database":[18,104],"performance":[19],"optimization":[20,51],"struggle":[21],"to":[22,44,56,91,170,185],"keep":[23],"pace.":[24],"To":[25,106],"address":[26],"this":[27,114,134],"gap,":[28],"a":[29,162],"new":[30],"class":[31],"techniques,":[33],"termed":[34],"AI-Driven":[35],"Research":[36],"Systems":[38],"(ADRS),":[39],"uses":[40],"large":[41],"language":[42],"models":[43],"automate":[45],"solution":[46],"discovery.":[47],"This":[48],"approach":[49,135],"shifts":[50],"from":[52,89],"manual":[53],"system":[54],"design":[55,120],"automated":[57,149],"code":[58,190],"generation.":[59],"The":[60],"key":[61],"obstacle,":[62],"however,":[63],"in":[64,113],"applying":[65],"ADRS":[66,112,184],"is":[67,99],"evaluation":[69,178],"pipeline.":[70],"Since":[71],"these":[72],"frameworks":[73],"rapidly":[74],"generate":[75,186],"hundreds":[76],"candidates":[78],"without":[79],"supervision,":[81],"they":[82],"depend":[83],"on":[84,93],"fast":[85],"accurate":[87],"feedback":[88],"evaluators":[90,98,122,150],"converge":[92],"effective":[94],"solutions.":[95,128],"Building":[96],"such":[97],"especially":[100],"difficult":[101],"complex":[103],"systems.":[105,194],"enable":[107,151],"practical":[109],"application":[110],"domain,":[115],"we":[116],"propose":[117],"automating":[118],"by":[123],"co-evolving":[124],"them":[125],"with":[126],"We":[129],"demonstrate":[130],"effectiveness":[132],"through":[136],"three":[137],"case":[138],"studies":[139],"optimizing":[140],"buffer":[141],"management,":[142],"query":[143,164],"rewriting,":[144],"index":[146],"selection.":[147],"Our":[148],"discovery":[153],"novel":[155],"algorithms":[156],"that":[157,167,175],"outperform":[158],"state-of-the-art":[159],"baselines":[160],"(e.g.,":[161],"deterministic":[163],"rewrite":[165],"policy":[166],"achieves":[168],"up":[169],"6.8x":[171],"lower":[172],"latency),":[173],"demonstrating":[174],"addressing":[176],"bottleneck":[179],"unlocks":[180],"potential":[182],"highly":[187],"optimized,":[188],"deployable":[189],"next-generation":[192],"data":[193]},"counts_by_year":[],"updated_date":"2026-04-10T06:07:51.998497","created_date":"2026-04-10T00:00:00"}
