{"id":"https://openalex.org/W4412887758","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1027","title":"TABGEN-ICL: Residual-Aware In-Context Example Selection for Tabular Data Generation","display_name":"TABGEN-ICL: Residual-Aware In-Context Example Selection for Tabular Data Generation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412887758","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1027"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.1027","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1027","pdf_url":"https://aclanthology.org/2025.findings-acl.1027.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.1027.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076182187","display_name":"Liancheng Fang","orcid":"https://orcid.org/0000-0002-3859-5770"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liancheng Fang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102845658","display_name":"Aiwei Liu","orcid":"https://orcid.org/0000-0002-4965-8263"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aiwei Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101714293","display_name":"Hengrui Zhang","orcid":"https://orcid.org/0000-0001-5090-3177"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hengrui Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109737659","display_name":"Henry Peng Zou","orcid":"https://orcid.org/0009-0003-5259-4998"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henry Peng Zou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043782446","display_name":"Weizhi Zhang","orcid":"https://orcid.org/0000-0002-9131-3234"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weizhi Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5036357902","display_name":"Philip S. Yu","orcid":"https://orcid.org/0000-0002-3491-5968"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Philip S. Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77171262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"20027","last_page":"20041"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9670000076293945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7656570076942444},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6962845325469971},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6721373200416565},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6410996317863464},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21073412895202637},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12520664930343628},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06457823514938354},{"id":"https://openalex.org/keywords/archaeology","display_name":"Archaeology","score":0.047961026430130005}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7656570076942444},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6962845325469971},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6721373200416565},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6410996317863464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21073412895202637},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12520664930343628},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06457823514938354},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.047961026430130005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.1027","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1027","pdf_url":"https://aclanthology.org/2025.findings-acl.1027.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.1027","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1027","pdf_url":"https://aclanthology.org/2025.findings-acl.1027.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Climate action","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/13"}],"awards":[{"id":"https://openalex.org/G1860245536","display_name":"NSF POSE: Phase II: OpenAD: An Integrated Open-Source Ecosystem for Anomaly Detection","funder_award_id":"2346158","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4320322031","display_name":"III: Medium: Collaborative Research: Self-Supervised Recommender System Learning with Application Specific Adaption","funder_award_id":"2106758","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5722720762","display_name":null,"funder_award_id":"III-2106758","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7777878372","display_name":null,"funder_award_id":"POSE-2346158","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412887758.pdf","grobid_xml":"https://content.openalex.org/works/W4412887758.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2560215812","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2949601986"],"abstract_inverted_index":{"Large":[0],"Language":[1],"models":[2],"(LLMs)":[3],"have":[4],"achieved":[5],"encouraging":[6],"results":[7],"in":[8,43,104,167],"tabular":[9,66,122,162],"data":[10,67,87],"generation.However,":[11],"existing":[12],"approaches":[13],"require":[14],"finetuning,":[15],"which":[16],"is":[17,165],"computationally":[18],"expensive.This":[19],"paper":[20],"explores":[21],"an":[22],"alternative:":[23],"prompting":[24,154],"a":[25,51,72,139,155],"fixed":[26,156],"LLM":[27,103,157],"with":[28],"in-context":[29,36,53,60,98],"examples.We":[30],"observe":[31],"that":[32,77,125,153],"using":[33],"randomly":[34],"selected":[35],"examples":[37,100],"hampers":[38],"the":[39,59,79,102,111,129,135,150,168],"LLM's":[40],"performance,":[41],"resulting":[42],"sub-optimal":[44],"generation":[45],"quality.To":[46],"address":[47],"this,":[48],"we":[49],"propose":[50],"novel":[52],"learning":[54,61,99],"framework:":[55],"TABGEN-ICL,":[56],"to":[57,143],"enhance":[58],"ability":[62],"of":[63,74,141],"LLMs":[64],"for":[65,101,149],"generation.TABGEN-ICL":[68],"operates":[69],"iteratively,":[70],"retrieving":[71],"subset":[73],"real":[75,116],"samples":[76,84],"represent":[78],"residual":[80],"between":[81,113],"currently":[82],"generated":[83,114],"and":[85,115],"true":[86],"distributions.This":[88],"approach":[89],"serves":[90],"two":[91],"purposes:":[92],"locally,":[93],"it":[94,108,133],"provides":[95],"more":[96],"effective":[97],"each":[105],"iteration;":[106],"globally,":[107],"progressively":[109],"narrows":[110],"gap":[112],"data.Extensive":[117],"experiments":[118],"on":[119,145],"five":[120],"real-world":[121],"datasets":[123],"demonstrate":[124,148],"TABGEN-ICL":[126],"significantly":[127],"outperforms":[128],"random":[130],"selection":[131],"strategy.Specifically,":[132],"reduces":[134],"error":[136],"rate":[137],"by":[138],"margin":[140],"up":[142],"42.2%":[144],"fidelity":[146],"metrics.We":[147],"first":[151],"time":[152],"can":[158],"yield":[159],"high-quality":[160],"synthetic":[161],"data.The":[163],"code":[164],"provided":[166],"link.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
