{"id":"https://openalex.org/W4411403466","doi":"https://doi.org/10.1145/3725278","title":"Cracking SQL Barriers: An LLM-based Dialect Translation System","display_name":"Cracking SQL Barriers: An LLM-based Dialect Translation System","publication_year":2025,"publication_date":"2025-06-17","ids":{"openalex":"https://openalex.org/W4411403466","doi":"https://doi.org/10.1145/3725278"},"language":"en","primary_location":{"id":"doi:10.1145/3725278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725278","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3725278","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102722470","display_name":"Wei Zhou","orcid":"https://orcid.org/0009-0000-8862-7753"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Zhou","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107921195","display_name":"Y. Gao","orcid":"https://orcid.org/0000-0002-5047-4162"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuyang Gao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056912386","display_name":"Xuanhe Zhou","orcid":"https://orcid.org/0000-0002-2285-7836"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanhe Zhou","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102722470"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.5903,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84881102,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"3","issue":"3","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8655492067337036},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.7191563844680786},{"id":"https://openalex.org/keywords/abstract-syntax-tree","display_name":"Abstract syntax tree","score":0.633324146270752},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5347492098808289},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5059673190116882},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.505784273147583},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.47420385479927063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46273764967918396},{"id":"https://openalex.org/keywords/abstract-syntax","display_name":"Abstract syntax","score":0.4229486882686615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8655492067337036},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.7191563844680786},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.633324146270752},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5347492098808289},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5059673190116882},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.505784273147583},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.47420385479927063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46273764967918396},{"id":"https://openalex.org/C114408938","wikidata":"https://www.wikidata.org/wiki/Q333373","display_name":"Abstract syntax","level":3,"score":0.4229486882686615},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3725278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725278","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3725278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725278","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G5725260554","display_name":null,"funder_award_id":"62232009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G990739032","display_name":null,"funder_award_id":"2023YFB4503600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1988504473","https://openalex.org/W2122406210","https://openalex.org/W2164411868","https://openalex.org/W2187089797","https://openalex.org/W2240667924","https://openalex.org/W2546464424","https://openalex.org/W3046744391","https://openalex.org/W3191453585","https://openalex.org/W4205381461","https://openalex.org/W4223948957","https://openalex.org/W4230983477","https://openalex.org/W4291713239","https://openalex.org/W4320523283","https://openalex.org/W4385270607","https://openalex.org/W4385681533","https://openalex.org/W4392867188","https://openalex.org/W4394769102","https://openalex.org/W4396988324","https://openalex.org/W4399208444","https://openalex.org/W4401352024","https://openalex.org/W4401352664","https://openalex.org/W4402042542","https://openalex.org/W4404181272","https://openalex.org/W6602503091","https://openalex.org/W6973745107"],"related_works":["https://openalex.org/W2077104824","https://openalex.org/W2536864162","https://openalex.org/W2613250302","https://openalex.org/W2390421503","https://openalex.org/W2095381028","https://openalex.org/W2387926336","https://openalex.org/W4294658953","https://openalex.org/W319507398","https://openalex.org/W2143166528","https://openalex.org/W2364428493"],"abstract_inverted_index":{"Automatic":[0],"dialect":[1,69,121,150],"translation":[2,21,70,122,151,218,232],"reduces":[3],"the":[4,58,65,99,162,170,194],"complexity":[5],"of":[6,67,101],"database":[7,17,54,105],"migration,":[8],"which":[9,215],"is":[10,248],"crucial":[11],"for":[12,116],"applications":[13],"interacting":[14],"with":[15,52,71],"multiple":[16,92],"systems.":[18],"However,":[19],"rule-based":[20],"tools":[22],"(e.g.,":[23,87,241],"SQLGlot,":[24],"jOOQ,":[25],"SQLines)":[26],"are":[27,77],"labor-intensive":[28],"to":[29,35,44,136,190,244],"develop":[30],"and":[31,47,91,111,130,168,177,197,219],"often":[32,83,123],"(1)":[33],"fail":[34],"translate":[36,137],"certain":[37,200],"operations,":[38,132],"(2)":[39],"produce":[40],"incorrect":[41],"translations":[42,50],"due":[43],"rule":[45],"deficiencies,":[46],"(3)":[48],"generate":[49,191],"compatible":[51],"some":[53],"versions":[55],"but":[56],"not":[57],"others.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63,146,155,183,208],"investigate":[64],"problem":[66],"automating":[68],"large":[72],"language":[73],"models":[74],"(LLMs).":[75],"There":[76],"three":[78],"main":[79],"challenges.":[80],"First,":[81,154],"queries":[82],"involve":[84],"lengthy":[85],"content":[86],"excessive":[88],"column":[89],"values)":[90],"syntax":[93,109,118,166,195,205],"elements":[94],"that":[95,160,223],"require":[96],"translation,":[97],"increasing":[98],"risk":[100],"LLM":[102],"hallucination.":[103],"Second,":[104,182],"dialects":[106],"have":[107],"diverse":[108],"trees":[110,167,196],"specifications,":[112],"making":[113,133],"it":[114,134],"difficult":[115],"cross-dialect":[117],"matching.":[119,206],"Third,":[120,207],"involves":[124],"complex":[125],"many-to-one":[126],"relationships":[127],"between":[128],"source":[129],"target":[131],"impractical":[135],"each":[138],"operation":[139],"in":[140],"isolation.":[141],"To":[142],"address":[143],"these":[144,229],"challenges,":[145],"propose":[147,156,209],"an":[148],"automatic":[149],"system":[152],"CrackSQL.":[153],"Functionality-based":[157],"Query":[158],"Processing":[159],"segments":[161],"query":[163,171,180,204],"by":[164,193,242],"functionality":[165],"simplifies":[169],"via":[172],"(i)":[173],"customized":[174],"function":[175],"normalization":[176],"(ii)":[178],"translation-irrelevant":[179],"abstraction.":[181],"design":[184],"a":[185,210],"Cross-Dialect":[186],"Syntax":[187],"Embedding":[188],"Model":[189],"embeddings":[192],"specifications":[198],"(of":[199],"version),":[201],"enabling":[202],"accurate":[203],"Local-to-Global":[211],"Dialect":[212],"Translation":[213],"strategy,":[214],"restricts":[216],"LLM-based":[217],"validation":[220],"on":[221],"operations":[222,230],"cause":[224],"local":[225],"failures,":[226],"iteratively":[227],"extending":[228],"until":[231],"succeeds.":[233],"Experiments":[234],"show":[235],"CrackSQL":[236],"significantly":[237],"outperforms":[238],"existing":[239],"methods":[240],"up":[243],"77.42%).":[245],"The":[246],"code":[247],"available":[249],"at":[250],"https://github.com/weAIDB/CrackSQL.":[251]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-14T06:41:57.775601","created_date":"2025-10-10T00:00:00"}
