{"id":"https://openalex.org/W7162084908","doi":"https://doi.org/10.48550/arxiv.2605.21792","title":"Residual Skill Optimization for Text-to-SQL Ensembles","display_name":"Residual Skill Optimization for Text-to-SQL Ensembles","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7162084908","doi":"https://doi.org/10.48550/arxiv.2605.21792"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.21792","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21792","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.21792","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004156121","display_name":"Jiongli Zhu","orcid":"https://orcid.org/0000-0002-3238-8674"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jiongli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079955858","display_name":"Haoquan Guan","orcid":"https://orcid.org/0009-0007-5412-4489"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Haoquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082514869","display_name":"Parjanya Prashant","orcid":"https://orcid.org/0000-0003-1718-3037"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prashant, Parjanya Prajakta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030020226","display_name":"Nikki Lijing Kuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuang, Nikki Lijing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094196472","display_name":"Seyedeh Baharan Khatami","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khatami, Seyedeh Baharan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941860","display_name":"Canwen Xu","orcid":"https://orcid.org/0000-0002-1552-999X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Canwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136757820","display_name":"Xiaodong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Xiaodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136744340","display_name":"Yingyu Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Yingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102915910","display_name":"Zhewei Yao","orcid":"https://orcid.org/0000-0001-7678-4321"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Zhewei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136776181","display_name":"Yuxiong He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yuxiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103209063","display_name":"Babak Salimi","orcid":"https://orcid.org/0000-0003-2485-9533"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Salimi, Babak","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.22169999778270721,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.22169999778270721,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.14319999516010284,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.1298000067472458,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5443000197410583},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45100000500679016},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.3725000023841858},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.37119999527931213},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.34689998626708984},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.32899999618530273},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.3249000012874603},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.3183000087738037},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.2985000014305115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6161999702453613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5771999955177307},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5443000197410583},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5108000040054321},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45100000500679016},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.29840001463890076},{"id":"https://openalex.org/C178621042","wikidata":"https://www.wikidata.org/wiki/Q7631710","display_name":"Submodular set function","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29510000348091125},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29010000824928284},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.29010000824928284},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C2776145597","wikidata":"https://www.wikidata.org/wiki/Q25339462","display_name":"Dropout (neural networks)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C1921717","wikidata":"https://www.wikidata.org/wiki/Q1334846","display_name":"Mahalanobis distance","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.21792","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21792","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.21792","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21792","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8619197607040405}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-SQL":[0,62],"ensembles":[1,63],"improve":[2],"over":[3,104],"single-candidate":[4],"generation":[5],"by":[6,19,47,93],"drawing":[7],"multiple":[8],"SQL":[9],"candidates":[10,29],"and":[11,100,117,133,154],"selecting":[12],"one,":[13],"but":[14],"their":[15],"effectiveness":[16],"is":[17,30,70],"bounded":[18],"Pass@K,":[20],"the":[21,74,105],"probability":[22],"that":[23,58,158],"at":[24],"least":[25],"one":[26],"of":[27],"K":[28],"correct.":[31],"Existing":[32],"methods":[33],"source":[34],"diversity":[35],"heuristically":[36],"through":[37],"stochastic":[38],"decoding":[39],"or":[40],"prompt":[41],"variants,":[42],"leaving":[43],"candidate":[44],"sets":[45],"dominated":[46],"correlated":[48],"failures.":[49],"We":[50],"present":[51],"DivSkill-SQL,":[52],"a":[53,122,135],"residual":[54],"skill":[55,69,76],"optimization":[56],"framework":[57],"builds":[59],"complementary":[60,164],"agentic":[61],"without":[64,126],"model":[65],"fine-tuning:":[66],"each":[67],"new":[68],"optimized":[71,120],"on":[72,98,102,121],"examples":[73],"current":[75],"ensemble":[77,107],"fails":[78],"on,":[79],"provably":[80],"targeting":[81],"its":[82],"marginal":[83],"contribution":[84],"to":[85,95,134,148],"Pass@K.":[86],"On":[87],"Spider2-Lite,":[88],"DivSkill-SQL":[89],"improves":[90],"selected":[91],"accuracy":[92],"up":[94,147],"+11.1":[96],"points":[97],"Snowflake":[99],"+8.3":[101],"BigQuery":[103],"strongest":[106],"baseline,":[108],"with":[109],"consistent":[110],"gains":[111,159],"across":[112,128],"two":[113],"base":[114],"models":[115],"(Opus-4.6":[116],"GPT-5.4).":[118],"Skills":[119],"single":[123],"dialect":[124],"transfer":[125],"retraining":[127],"dialects":[129],"(Snowflake,":[130],"BigQuery,":[131],"SQLite)":[132],"different":[136],"task":[137],"formulation,":[138],"such":[139],"as":[140],"BIRD-Critic":[141],"(+2.6":[142],"pts).":[143],"Error":[144],"diagnostics":[145],"show":[146],"3x":[149],"fewer":[150],"hallucinated":[151],"schema":[152],"references":[153],"function":[155],"calls,":[156],"indicating":[157],"come":[160],"from":[161],"genuinely":[162],"reliable":[163],"skills":[165],"rather":[166],"than":[167],"surface-form":[168],"variation.":[169]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
