{"id":"https://openalex.org/W4415428110","doi":"https://doi.org/10.3233/faia251337","title":"CRED-SQL: Enhancing Real-World Large Scale Database Text-to-SQL Parsing Through Cluster Retrieval and Execution Description","display_name":"CRED-SQL: Enhancing Real-World Large Scale Database Text-to-SQL Parsing Through Cluster Retrieval and Execution Description","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428110","doi":"https://doi.org/10.3233/faia251337"},"language":null,"primary_location":{"id":"doi:10.3233/faia251337","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251337","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251337","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086035676","display_name":"Shaoming Duan","orcid":"https://orcid.org/0000-0002-7546-9562"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shaoming Duan","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Pengcheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687843","display_name":"Zirui Wang","orcid":"https://orcid.org/0000-0002-5399-3179"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zirui Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103171964","display_name":"Chuanyi Liu","orcid":"https://orcid.org/0000-0002-9846-9709"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuanyi Liu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Pengcheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103116260","display_name":"Zhibin Zhu","orcid":"https://orcid.org/0000-0002-1478-2520"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibin Zhu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100349339","display_name":"Yuhao Zhang","orcid":"https://orcid.org/0000-0001-7325-6507"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210118493","display_name":"Mind","ror":"https://ror.org/01ygx6877","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210118493"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Yuhao Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Mindflow.ai"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Mindflow.ai","institution_ids":["https://openalex.org/I4210118493"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101992266","display_name":"Peiyi Han","orcid":"https://orcid.org/0000-0003-0417-4473"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peiyi Han","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Pengcheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104000120","display_name":"Liang Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210144143","display_name":"Inspur (China)","ror":"https://ror.org/0474p4r72","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210144143"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Yan","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Inspur Cloud Information Technology Co., Ltd, Jinan 250101, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Inspur Cloud Information Technology Co., Ltd, Jinan 250101, China","institution_ids":["https://openalex.org/I4210144143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008907550","display_name":"Zewu Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I74872605","display_name":"China Southern Power Grid (China)","ror":"https://ror.org/03hkh9419","country_code":"CN","type":"company","lineage":["https://openalex.org/I74872605"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zewu Peng","raw_affiliation_strings":["Guangdong Power Grid Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Power Grid Co., Ltd, China","institution_ids":["https://openalex.org/I74872605"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5086035676"],"corresponding_institution_ids":["https://openalex.org/I204983213","https://openalex.org/I4210136793"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.6772137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.902400016784668,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.902400016784668,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.6317999958992004},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.611299991607666},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.47679999470710754},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4733999967575073},{"id":"https://openalex.org/keywords/database-schema","display_name":"Database schema","score":0.4722000062465668},{"id":"https://openalex.org/keywords/data-definition-language","display_name":"Data definition language","score":0.414900004863739},{"id":"https://openalex.org/keywords/natural-language-user-interface","display_name":"Natural language user interface","score":0.35850000381469727},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.3102000057697296},{"id":"https://openalex.org/keywords/data-control-language","display_name":"Data control language","score":0.30790001153945923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8709999918937683},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.6317999958992004},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.611299991607666},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.47679999470710754},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C30775581","wikidata":"https://www.wikidata.org/wiki/Q632285","display_name":"Database schema","level":3,"score":0.4722000062465668},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4611999988555908},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44200000166893005},{"id":"https://openalex.org/C55596503","wikidata":"https://www.wikidata.org/wiki/Q1431648","display_name":"Data definition language","level":3,"score":0.414900004863739},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3833000063896179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38029998540878296},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.35850000381469727},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C32977378","wikidata":"https://www.wikidata.org/wiki/Q604737","display_name":"Data control language","level":5,"score":0.30790001153945923},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3075999915599823},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.289900004863739},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.28610000014305115},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C29275276","wikidata":"https://www.wikidata.org/wiki/Q2268965","display_name":"Conceptual schema","level":3,"score":0.2775000035762787},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251337","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251337","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251337","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251337","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,37],"large":[3],"language":[4,25,105],"models":[5],"(LLMs)":[6],"have":[7],"significantly":[8],"improved":[9],"the":[10,20,86,111,120],"accuracy":[11],"of":[12],"Text-to-SQL":[13],"systems.":[14],"However,":[15],"a":[16,64,93],"critical":[17],"challenge":[18],"remains:":[19],"semantic":[21,48,136],"mismatch":[22],"between":[23,113],"natural":[24,104],"questions":[26],"(NLQs)":[27],"and":[28,47,74,88,115,126,145,157],"their":[29],"corresponding":[30],"SQL":[31,51],"queries.":[32],"This":[33,117],"issue":[34],"is":[35,161],"exacerbated":[36],"large-scale":[38,68,81],"databases,":[39],"where":[40],"semantically":[41],"similar":[42],"attributes":[43],"hinder":[44],"schema":[45,82,97],"linking":[46],"drift":[49],"during":[50],"generation,":[52],"ultimately":[53],"reducing":[54,135],"model":[55],"accuracy.":[56],"To":[57],"address":[58],"these":[59],"challenges,":[60],"we":[61],"introduce":[62],"CRED-SQL,":[63],"framework":[65],"designed":[66],"for":[67],"databases":[69],"that":[70,147],"integrates":[71],"Cluster":[72],"Retrieval":[73],"Execution":[75],"Description.":[76],"CRED-SQL":[77,148],"first":[78],"performs":[79],"cluster-based":[80],"retrieval":[83],"to":[84,92],"pinpoint":[85],"tables":[87],"columns":[89],"most":[90],"relevant":[91],"given":[94],"NLQ,":[95],"alleviating":[96],"mismatch.":[98],"It":[99],"then":[100],"introduces":[101],"an":[102],"intermediate":[103],"representation\u2014Execution":[106],"Description":[107],"Language":[108],"(EDL)\u2014to":[109],"bridge":[110],"gap":[112],"NLQs":[114],"SQL.":[116],"reformulation":[118],"decomposes":[119],"task":[121],"into":[122],"two":[123,141],"stages:":[124],"Text-to-EDL":[125],"EDL-to-SQL,":[127],"leveraging":[128],"LLMs\u2019":[129],"strong":[130],"general":[131],"reasoning":[132],"capabilities":[133],"while":[134],"deviation.":[137],"Extensive":[138],"experiments":[139],"on":[140],"large-scale,":[142],"cross-domain":[143],"benchmarks\u2014SpiderUnion":[144],"BirdUnion\u2014demonstrate":[146],"achieves":[149],"new":[150],"state-of-the-art":[151],"(SOTA)":[152],"performance,":[153],"validating":[154],"its":[155],"effectiveness":[156],"scalability.":[158],"Our":[159],"code":[160],"available":[162],"at":[163],"https://github.com/smduan/CRED-SQL.git":[164]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
