{"id":"https://openalex.org/W4414244629","doi":"https://doi.org/10.14778/3750601.3750629","title":"Towards Automated Cross-Domain Exploratory Data Analysis through Large Language Models","display_name":"Towards Automated Cross-Domain Exploratory Data Analysis through Large Language Models","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4414244629","doi":"https://doi.org/10.14778/3750601.3750629"},"language":"en","primary_location":{"id":"doi:10.14778/3750601.3750629","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750629","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114195823","display_name":"Jun-Peng Zhu","orcid":"https://orcid.org/0009-0006-9053-0129"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jun-Peng Zhu","raw_affiliation_strings":["East China Normal University &amp; PingCAP"],"affiliations":[{"raw_affiliation_string":"East China Normal University &amp; PingCAP","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105309751","display_name":"Boyan Niu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boyan Niu","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100618606","display_name":"Peng Cai","orcid":"https://orcid.org/0000-0002-5457-3811"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Cai","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113300589","display_name":"Zheming Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheming Ni","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108443884","display_name":"Jianwei Wan","orcid":"https://orcid.org/0009-0005-5440-4179"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianwei Wan","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101925526","display_name":"Kai Xu","orcid":"https://orcid.org/0000-0002-4334-2444"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kai Xu","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101440049","display_name":"Jiajun Huang","orcid":"https://orcid.org/0009-0007-2346-953X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiajun Huang","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111513994","display_name":"Shengbo Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shengbo Ma","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382568","display_name":"Bing Wang","orcid":"https://orcid.org/0000-0003-4945-7725"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bing Wang","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075253960","display_name":"Xuan Zhou","orcid":"https://orcid.org/0000-0002-8058-7627"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Zhou","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111390662","display_name":"Guangyu Bao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guanglei Bao","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101882846","display_name":"Donghui Zhang","orcid":"https://orcid.org/0000-0002-9503-5809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Donghui Zhang","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100764958","display_name":"Liu Tang","orcid":"https://orcid.org/0000-0002-0813-8229"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu Tang","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100453156","display_name":"Qi Liu","orcid":"https://orcid.org/0000-0001-6956-5550"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi Liu","raw_affiliation_strings":["PingCAP, China"],"affiliations":[{"raw_affiliation_string":"PingCAP, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5114195823"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":2.856,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92186513,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"18","issue":"12","first_page":"5086","last_page":"5099"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5515000224113464},{"id":"https://openalex.org/keywords/exploratory-data-analysis","display_name":"Exploratory data analysis","score":0.5128999948501587},{"id":"https://openalex.org/keywords/data-visualization","display_name":"Data visualization","score":0.489300012588501},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.48159998655319214},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.45190000534057617},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.44510000944137573},{"id":"https://openalex.org/keywords/visual-language","display_name":"Visual language","score":0.4341999888420105},{"id":"https://openalex.org/keywords/data-exploration","display_name":"Data exploration","score":0.37130001187324524},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.32749998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84579998254776},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5515000224113464},{"id":"https://openalex.org/C120894424","wikidata":"https://www.wikidata.org/wiki/Q1322871","display_name":"Exploratory data analysis","level":2,"score":0.5128999948501587},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.489300012588501},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48159998655319214},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.45190000534057617},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.4341999888420105},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.382099986076355},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3723999857902527},{"id":"https://openalex.org/C2780977526","wikidata":"https://www.wikidata.org/wiki/Q42417149","display_name":"Data exploration","level":3,"score":0.37130001187324524},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.32749998569488525},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.3253999948501587},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.296099990606308},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2906000018119812},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2806999981403351},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C100463513","wikidata":"https://www.wikidata.org/wiki/Q5227322","display_name":"Data model (GIS)","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26820001006126404},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3750601.3750629","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750629","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2083619093","https://openalex.org/W2160382748","https://openalex.org/W2536574992","https://openalex.org/W2592469568","https://openalex.org/W2610226709","https://openalex.org/W2750779823","https://openalex.org/W2790634852","https://openalex.org/W2792990871","https://openalex.org/W2795226127","https://openalex.org/W2804295601","https://openalex.org/W2889073286","https://openalex.org/W2890431379","https://openalex.org/W2952032096","https://openalex.org/W2966332071","https://openalex.org/W2991028016","https://openalex.org/W3032467329","https://openalex.org/W3046744391","https://openalex.org/W3086973390","https://openalex.org/W3103801878","https://openalex.org/W3183318857","https://openalex.org/W3198767185","https://openalex.org/W4205807230","https://openalex.org/W4285338239","https://openalex.org/W4285451014","https://openalex.org/W4286447321","https://openalex.org/W4287816933","https://openalex.org/W4317641445","https://openalex.org/W4382202531","https://openalex.org/W4382202695","https://openalex.org/W4389523621","https://openalex.org/W4399175046","https://openalex.org/W4400909559","https://openalex.org/W4402043038","https://openalex.org/W4404181176","https://openalex.org/W4404181300","https://openalex.org/W4410538768"],"related_works":[],"abstract_inverted_index":{"Exploratory":[0],"data":[1,10,14,19,81,115,123,149,173],"analysis":[2,116],"(EDA),":[3],"coupled":[4],"with":[5,184],"SQL,":[6],"is":[7,140,155],"essential":[8],"for":[9,142],"analysts":[11,20],"involved":[12],"in":[13,188],"exploration":[15,82],"and":[16,34,100,166,172],"analysis.":[17],"However,":[18,74],"often":[21],"encounter":[22],"two":[23],"primary":[24],"challenges:":[25],"(1)":[26,87],"the":[27,36,45,132,136,152,189,206,237,252],"need":[28],"to":[29,38,51,60,64,78,86,130,135,146,202,225],"craft":[30],"SQL":[31],"queries":[32],"skillfully":[33],"(2)":[35,91],"requirement":[37],"generate":[39],"suitable":[40],"visualization":[41,174],"types":[42],"that":[43,219],"enhance":[44],"interpretation":[46],"of":[47,200,234,249],"query":[48],"results.":[49],"Due":[50],"its":[52],"significance,":[53],"substantial":[54],"research":[55,204],"efforts":[56],"have":[57,195],"been":[58],"made":[59],"explore":[61],"different":[62],"approaches":[63],"address":[65],"these":[66],"challenges,":[67],"including":[68],"leveraging":[69],"large":[70],"language":[71],"models":[72],"(LLMs).":[73],"existing":[75],"methods":[76],"fail":[77],"meet":[79],"real-world":[80,214],"requirements":[83],"primarily":[84],"due":[85],"complex":[88],"database":[89,137],"schema,":[90,138],"unclear":[92],"user":[93,215],"intent,":[94],"(3)":[95],"limited":[96],"cross-domain":[97,113],"generalization":[98],"capability,":[99],"(4)":[101],"insufficient":[102],"end-to-end":[103,181],"text-to-visualization":[104],"capability.":[105],"This":[106],"paper":[107],"presents":[108],"TiInsight,":[109],"an":[110,180,231,246],"automated":[111],"SQL-based":[112],"exploratory":[114],"system.":[117],"First,":[118],"we":[119,178,217],"propose":[120],"a":[121,185,213],"hierarchical":[122],"context":[124],"(i.e.,":[125,160,170,175],"HDC),":[126],"which":[127,139],"leverages":[128],"LLMs":[129],"summarize":[131],"contexts":[133],"related":[134],"crucial":[141],"open-world":[143],"EDA":[144,153,182,207],"systems":[145],"generalize":[147],"across":[148],"domains.":[150],"Second,":[151],"system":[154,183],"divided":[156],"into":[157],"four":[158],"components":[159],"stages):":[161],"HDC":[162],"generation,":[163],"question":[164],"clarification":[165],"decomposition,":[167],"text-to-SQL":[168],"generation":[169],"TiSQL),":[171],"TiChart).":[176],"Finally,":[177],"implemented":[179],"user-friendly":[186],"GUI":[187],"production":[190],"environment":[191],"at":[192],"PingCAP.":[193],"We":[194],"also":[196,244],"open-sourced":[197],"all":[198],"APIs":[199],"TiInsight":[201,220],"facilitate":[203],"within":[205],"community.":[208],"Through":[209],"extensive":[210],"evaluations":[211],"by":[212],"study,":[216],"demonstrate":[218],"offers":[221],"remarkable":[222],"performance":[223],"compared":[224],"human":[226],"experts.":[227],"Additionally,":[228],"TiSQL":[229],"achieves":[230],"execution":[232,247],"accuracy":[233,248],"86.3%":[235],"on":[236,251],"Spider":[238],"dataset":[239],"when":[240],"using":[241],"GPT-4.":[242],"It":[243],"attains":[245],"60.98%":[250],"Bird":[253],"test":[254],"dataset.":[255]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
