{"id":"https://openalex.org/W4411403276","doi":"https://doi.org/10.1145/3725262","title":"Adda: Towards Efficient in-Database Feature Generation via LLM-based Agents","display_name":"Adda: Towards Efficient in-Database Feature Generation via LLM-based Agents","publication_year":2025,"publication_date":"2025-06-17","ids":{"openalex":"https://openalex.org/W4411403276","doi":"https://doi.org/10.1145/3725262"},"language":"en","primary_location":{"id":"doi:10.1145/3725262","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725262","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kuan Lu","orcid":"https://orcid.org/0009-0007-9344-619X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kuan Lu","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":"https://orcid.org/0009-0007-9344-619X","affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102015659","display_name":"Zhihui Yang","orcid":"https://orcid.org/0000-0002-0344-1464"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihui Yang","raw_affiliation_strings":["Zhejiang University, The State Key Laboratory of Blockchain and Data Security, HangZhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0344-1464","affiliations":[{"raw_affiliation_string":"Zhejiang University, The State Key Laboratory of Blockchain and Data Security, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017672363","display_name":"Sai Wu","orcid":"https://orcid.org/0000-0002-7903-1496"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sai Wu","raw_affiliation_strings":["Zhejiang Key Laboratory of Big Data Intelligent Computing, Zhejiang University, HangZhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7903-1496","affiliations":[{"raw_affiliation_string":"Zhejiang Key Laboratory of Big Data Intelligent Computing, Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I4210123185","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114084270","display_name":"Ruichen Xia","orcid":"https://orcid.org/0009-0002-2736-451X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruichen Xia","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":"https://orcid.org/0009-0002-2736-451X","affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032211015","display_name":"Dongxiang Zhang","orcid":"https://orcid.org/0009-0006-6338-0698"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxiang Zhang","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":"https://orcid.org/0009-0006-6338-0698","affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389286","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7483-0045","affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":5.6185,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95569911,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"3","issue":"3","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8557665348052979},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.7603875398635864},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.555036723613739},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5263841152191162},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.4727633595466614},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4121481776237488},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3411613702774048},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12930846214294434}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8557665348052979},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.7603875398635864},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.555036723613739},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5263841152191162},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.4727633595466614},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4121481776237488},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3411613702774048},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12930846214294434},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3725262","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725262","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1976373002","https://openalex.org/W1988714735","https://openalex.org/W2108114251","https://openalex.org/W2182353144","https://openalex.org/W2584335703","https://openalex.org/W2740333758","https://openalex.org/W2784041417","https://openalex.org/W2798903611","https://openalex.org/W2896457183","https://openalex.org/W2962991166","https://openalex.org/W2977715335","https://openalex.org/W3013995528","https://openalex.org/W3029900615","https://openalex.org/W3030051638","https://openalex.org/W3093217081","https://openalex.org/W3123375411","https://openalex.org/W3165814564","https://openalex.org/W4206285062","https://openalex.org/W4237407586","https://openalex.org/W4281397938","https://openalex.org/W4281626245","https://openalex.org/W4289866515","https://openalex.org/W4294904053","https://openalex.org/W4301165286","https://openalex.org/W4385569780","https://openalex.org/W4400531953"],"related_works":["https://openalex.org/W2003932708","https://openalex.org/W1967424056","https://openalex.org/W3147584709","https://openalex.org/W2284877871","https://openalex.org/W2138102289","https://openalex.org/W2977677679","https://openalex.org/W868043274","https://openalex.org/W2513523087","https://openalex.org/W1992327129","https://openalex.org/W4320802139"],"abstract_inverted_index":{"Integrating":[0],"machine":[1],"learning":[2],"(ML)":[3],"analytics":[4,88,96,158],"into":[5,126,139],"existing":[6],"database":[7],"management":[8],"systems":[9],"(DBMSs)":[10],"not":[11],"only":[12],"eliminates":[13],"the":[14,91,110,176],"need":[15],"for":[16,37,43,55,63,86,105,151,156],"costly":[17],"data":[18],"transfers":[19],"to":[20,52,81,180,188,191],"external":[21],"ML":[22,41,65,87,95,157,168],"platforms":[23],"but":[24],"also":[25],"ensures":[26],"compliance":[27],"with":[28,118,166],"regulatory":[29],"standards.":[30],"While":[31],"some":[32],"DBMSs":[33],"have":[34],"integrated":[35,117],"functionalities":[36],"training":[38],"and":[39,102,135,182],"applying":[40],"models":[42,115],"analytics,":[44],"these":[45],"tasks":[46,97,169],"still":[47],"present":[48],"challenges,":[49],"particularly":[50],"due":[51],"limited":[53],"support":[54],"automatic":[56],"feature":[57,77,106,152],"engineering":[58],"(AutoFE),":[59],"which":[60],"is":[61,123,145],"crucial":[62],"optimizing":[64],"model":[66],"performance.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71],"introduce":[72],"Adda,":[73],"an":[74],"agent-driven":[75],"in-database":[76,149],"generation":[78],"tool":[79],"designed":[80],"automatically":[82],"create":[83],"high-quality":[84],"features":[85],"directly":[89],"within":[90],"database.":[92],"Adda":[93,174],"interprets":[94],"described":[98],"in":[99],"natural":[100],"language":[101,114],"generates":[103],"code":[104,122],"construction":[107],"by":[108,178,186],"leveraging":[109],"power":[111],"of":[112,133],"large":[113],"(LLMs)":[116],"specialized":[119],"agents.":[120],"This":[121],"then":[124],"translated":[125],"SQL":[127],"statements":[128],"using":[129],"a":[130,146],"predefined":[131],"set":[132],"operators":[134],"compiled":[136],"just-in-time":[137],"(JIT)":[138],"user-defined":[140],"functions":[141],"(UDFs).":[142],"The":[143],"result":[144],"seamless,":[147],"fully":[148],"solution":[150],"generation,":[153],"specifically":[154],"tailored":[155],"tasks.":[159],"Extensive":[160],"experiments":[161],"across":[162],"14":[163],"public":[164],"datasets,":[165],"five":[167],"per":[170],"dataset,":[171],"show":[172],"that":[173],"improves":[175],"AUC":[177],"up":[179,187],"33.2%":[181],"reduces":[183],"end-to-end":[184],"latency":[185],"100x":[189],"compared":[190],"Madlib.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-05-27T09:02:27.158192","created_date":"2025-10-10T00:00:00"}
