{"id":"https://openalex.org/W7141597993","doi":"https://doi.org/10.48550/arxiv.2603.25568","title":"Are LLMs Overkill for Databases?: A Study on the Finiteness of SQL","display_name":"Are LLMs Overkill for Databases?: A Study on the Finiteness of SQL","publication_year":2026,"publication_date":"2026-03-26","ids":{"openalex":"https://openalex.org/W7141597993","doi":"https://doi.org/10.48550/arxiv.2603.25568"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.25568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.25568","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130763241","display_name":"Yue Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086934220","display_name":"David Mimno","orcid":"https://orcid.org/0000-0001-7510-9404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mimno, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130731689","display_name":"Unso Eun Seo Jo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jo, Unso Eun Seo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.0860000029206276,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.770799994468689},{"id":"https://openalex.org/keywords/data-definition-language","display_name":"Data definition language","score":0.6660000085830688},{"id":"https://openalex.org/keywords/stored-procedure","display_name":"Stored procedure","score":0.5893999934196472},{"id":"https://openalex.org/keywords/null","display_name":"Null (SQL)","score":0.5436999797821045},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.49140000343322754},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4690000116825104},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.453900009393692},{"id":"https://openalex.org/keywords/query-by-example","display_name":"Query by Example","score":0.37619999051094055}],"concepts":[{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.770799994468689},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7414000034332275},{"id":"https://openalex.org/C55596503","wikidata":"https://www.wikidata.org/wiki/Q1431648","display_name":"Data definition language","level":3,"score":0.6660000085830688},{"id":"https://openalex.org/C154420247","wikidata":"https://www.wikidata.org/wiki/Q846619","display_name":"Stored procedure","level":5,"score":0.5893999934196472},{"id":"https://openalex.org/C203763787","wikidata":"https://www.wikidata.org/wiki/Q371029","display_name":"Null (SQL)","level":2,"score":0.5436999797821045},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.49779999256134033},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.49140000343322754},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4690000116825104},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.453900009393692},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4465000033378601},{"id":"https://openalex.org/C194222762","wikidata":"https://www.wikidata.org/wiki/Q114486","display_name":"Query by Example","level":4,"score":0.37619999051094055},{"id":"https://openalex.org/C167544706","wikidata":"https://www.wikidata.org/wiki/Q360842","display_name":"SQL/PSM","level":5,"score":0.36419999599456787},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C179531526","wikidata":"https://www.wikidata.org/wiki/Q595637","display_name":"Language Integrated Query","level":5,"score":0.29649999737739563},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.287200003862381},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.259799987077713},{"id":"https://openalex.org/C123593499","wikidata":"https://www.wikidata.org/wiki/Q6008583","display_name":"In-Memory Processing","level":5,"score":0.2542000114917755},{"id":"https://openalex.org/C141589383","wikidata":"https://www.wikidata.org/wiki/Q644775","display_name":"Data Transformation Services","level":5,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.25568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.25568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Translating":[0],"natural":[1,60],"language":[2,61],"to":[3,13,22],"SQL":[4,24,55,85,91,122],"for":[5,131],"data":[6],"retrieval":[7],"has":[8],"become":[9,29],"more":[10],"accessible":[11],"thanks":[12],"code":[14,132],"generation":[15,133],"LLMs.":[16],"But":[17],"how":[18],"hard":[19],"is":[20,37,69],"it":[21],"generate":[23],"code?":[25],"While":[26],"databases":[27],"can":[28,106,134],"unbounded":[30],"in":[31,65,76,82,137,147],"complexity,":[32],"the":[33,118,138],"complexity":[34,83],"of":[35,49,59,84,98,102,112,121,140],"queries":[36,92,105,123],"bounded":[38],"by":[39],"real":[40],"life":[41],"utility":[42],"and":[43,80,159],"human":[44],"needs.":[45],"With":[46],"a":[47,94,148],"sample":[48],"376":[50],"databases,":[51],"we":[52],"show":[53],"that":[54,117,128],"queries,":[56],"as":[57],"translations":[58],"questions":[62],"are":[63,124],"finite":[64],"practical":[66],"complexity.":[67],"There":[68],"no":[70],"clear":[71],"monotonic":[72],"relationship":[73],"between":[74],"increases":[75,81],"database":[77,141],"table":[78],"count":[79],"queries.":[86],"In":[87],"their":[88],"template":[89,114],"forms,":[90],"follow":[93],"Power":[95],"Law-like":[96],"distribution":[97],"frequency":[99],"where":[100,153],"70%":[101],"our":[103],"tested":[104],"be":[107,135,145,156],"covered":[108],"with":[109],"just":[110],"13%":[111],"all":[113],"types,":[115],"indicating":[116],"high":[119],"majority":[120],"predictable.":[125],"This":[126],"suggests":[127],"while":[129],"LLMs":[130],"useful,":[136],"domain":[139],"access,":[142],"they":[143],"may":[144],"operating":[146],"narrow,":[149],"highly":[150],"formulaic":[151],"space":[152],"templates":[154],"could":[155],"safer,":[157],"cheaper,":[158],"auditable.":[160]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-28T00:00:00"}
