{"id":"https://openalex.org/W4414243402","doi":"https://doi.org/10.14778/3750601.3750647","title":"A Demonstration of QueryArtisan: Real-Time Data Lake Analysis via Dynamically Generated Data Manipulation Code","display_name":"A Demonstration of QueryArtisan: Real-Time Data Lake Analysis via Dynamically Generated Data Manipulation Code","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4414243402","doi":"https://doi.org/10.14778/3750601.3750647"},"language":"en","primary_location":{"id":"doi:10.14778/3750601.3750647","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750647","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021066656","display_name":"Wenhao Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenhao Liu","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112122466","display_name":"Tang Xiu","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Tang","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064646829","display_name":"Sai Wu","orcid":"https://orcid.org/0000-0002-1866-9197"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sai Wu","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056066639","display_name":"Chang Yao","orcid":"https://orcid.org/0000-0002-1187-6257"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Yao","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014098250","display_name":"Gongsheng Yuan","orcid":"https://orcid.org/0000-0002-7992-3179"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gongsheng Yuan","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389286","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5021066656"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30416529,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"12","first_page":"5263","last_page":"5266"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6327000260353088},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6155999898910522},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5695000290870667},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.536899983882904},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5210000276565552},{"id":"https://openalex.org/keywords/data-manipulation-language","display_name":"Data manipulation language","score":0.5031999945640564},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.5026999711990356},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.4659000039100647},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.39730000495910645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85589998960495},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6327000260353088},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6155999898910522},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5695000290870667},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5386000275611877},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.536899983882904},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5210000276565552},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.5031999945640564},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.5026999711990356},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.4659000039100647},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.39730000495910645},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.38769999146461487},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.36649999022483826},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.32690000534057617},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C100463513","wikidata":"https://www.wikidata.org/wiki/Q5227322","display_name":"Data model (GIS)","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31369999051094055},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C2780977526","wikidata":"https://www.wikidata.org/wiki/Q42417149","display_name":"Data exploration","level":3,"score":0.2554999887943268},{"id":"https://openalex.org/C175801342","wikidata":"https://www.wikidata.org/wiki/Q1988917","display_name":"Data analysis","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3750601.3750647","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3750601.3750647","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2112129552","https://openalex.org/W2970992672","https://openalex.org/W3031580546","https://openalex.org/W4384659457","https://openalex.org/W4408061150"],"related_works":[],"abstract_inverted_index":{"Querying":[0],"and":[1,16,88,92],"analyzing":[2],"data":[3,5,13,49,64,90,125],"in":[4,73,153],"lakes":[6],"requires":[7],"substantial":[8],"manual":[9],"intervention,":[10],"including":[11],"numerous":[12],"preprocessing":[14],"steps,":[15],"often":[17],"demands":[18],"complex":[19,71],"domain":[20],"expertise.":[21,81],"However,":[22],"the":[23,45,77,96,140,143,165],"advent":[24],"of":[25,118,142,164],"Large":[26],"Language":[27],"Models":[28],"(LLMs)":[29],"has":[30],"introduced":[31],"a":[32,40,57,131],"promising":[33],"solution":[34],"to":[35,69,123,138,155,159],"these":[36],"challenges":[37],"by":[38,108],"providing":[39],"unified":[41],"framework":[42],"for":[43,63,79,113],"interpreting":[44],"heterogeneous":[46,119],"datasets":[47],"within":[48],"lakes.":[50,65],"In":[51,129],"this":[52],"paper,":[53],"we":[54],"demonstrate":[55],"QueryArtisan,":[56],"novel":[58],"LLM-powered":[59],"analytical":[60,157],"system":[61,83],"tailored":[62],"It":[66],"enables":[67],"users":[68],"issue":[70],"queries":[72,87],"natural":[74],"language":[75],"without":[76],"need":[78],"domain-specific":[80],"The":[82],"automatically":[84],"executes":[85],"user-submitted":[86],"performs":[89],"processing":[91],"analysis":[93,163],"based":[94],"on":[95],"query":[97,133],"results.":[98],"QueryArtisan":[99,147],"extends":[100],"beyond":[101],"traditional":[102],"ETL":[103],"(Extract,":[104],"Transform,":[105],"Load)":[106],"processes":[107],"generating":[109],"just-in-time":[110],"code":[111],"customized":[112],"dataset-specific":[114],"tasks.":[115],"A":[116],"suite":[117],"operators":[120],"is":[121,136],"developed":[122],"process":[124],"across":[126],"various":[127],"modalities.":[128],"addition,":[130],"cost-based":[132],"optimization":[134],"mechanism":[135],"integrated":[137],"improve":[139],"efficiency":[141],"generated":[144],"code.":[145],"Furthermore,":[146],"can":[148],"dynamically":[149],"instantiate":[150],"multiple":[151],"agents":[152],"response":[154],"user-defined":[156],"requirements":[158],"perform":[160],"further":[161],"in-depth":[162],"retrieved":[166],"data.":[167]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
