{"id":"https://openalex.org/W7147725594","doi":"https://doi.org/10.1109/cnml68938.2026.11452539","title":"LLM-TDA: A LLM-based Tabular Data Augmentation Framework","display_name":"LLM-TDA: A LLM-based Tabular Data Augmentation Framework","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7147725594","doi":"https://doi.org/10.1109/cnml68938.2026.11452539"},"language":null,"primary_location":{"id":"doi:10.1109/cnml68938.2026.11452539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11452539","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066868064","display_name":"Rui Lin","orcid":"https://orcid.org/0000-0001-9560-7187"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Lin","raw_affiliation_strings":["Beijing Institute of Technology,School of Computer Science,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Computer Science,Beijing,China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132691200","display_name":"Zhiwei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Zhang","raw_affiliation_strings":["Beijing Institute of Technology,School of Computer Science,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Computer Science,Beijing,China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066868064"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.90523354,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1092","last_page":"1096"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.22669999301433563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.22669999301433563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.19480000436306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.18960000574588776,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.6248000264167786},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6132000088691711},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5843999981880188},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5595999956130981},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.47519999742507935},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.45089998841285706},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.41019999980926514},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4009000062942505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8593999743461609},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.6248000264167786},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6132000088691711},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5843999981880188},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5595999956130981},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5378000140190125},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4844000041484833},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.47519999742507935},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.45089998841285706},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4323999881744385},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.41019999980926514},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3075000047683716},{"id":"https://openalex.org/C101814296","wikidata":"https://www.wikidata.org/wiki/Q5439685","display_name":"Feature model","level":3,"score":0.30169999599456787},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cnml68938.2026.11452539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11452539","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1619226191","https://openalex.org/W2043772506","https://openalex.org/W2152195021","https://openalex.org/W2911964244","https://openalex.org/W3037852608","https://openalex.org/W3196904276","https://openalex.org/W4289236186","https://openalex.org/W4375928372","https://openalex.org/W4385270264","https://openalex.org/W4406939986","https://openalex.org/W4416037109"],"related_works":[],"abstract_inverted_index":{"Tabular":[0],"data":[1],"augmentation":[2,97],"is":[3],"a":[4,79,101,105,118,156,169],"critical":[5],"technique":[6],"for":[7,174],"improving":[8],"machine":[9],"learning":[10],"performance":[11],"in":[12,21,68,145,161],"data-limited":[13],"scenarios.":[14],"However,":[15],"existing":[16],"methods":[17,44],"face":[18],"inherent":[19],"limitations":[20],"balancing":[22],"quality":[23],"and":[24,171],"efficiency.":[25],"Filter-based":[26],"approaches":[27],"tend":[28],"to":[29,36,93,129,158,165],"introduce":[30],"low-quality":[31],"or":[32],"irrelevant":[33,111],"features":[34],"due":[35],"the":[37,64,84,95],"lack":[38],"of":[39,88],"task-specific":[40],"feedback,":[41],"while":[42,154],"wrapper-based":[43,166],"incur":[45],"prohibitive":[46],"time":[47],"overheads":[48],"necessitated":[49],"by":[50],"massive":[51],"downstream":[52,146],"model":[53],"training.":[54],"Fundamentally,":[55],"both":[56],"paradigms":[57],"rely":[58],"solely":[59],"on":[60,114,135],"statistical":[61,127],"properties,":[62],"neglecting":[63],"rich":[65],"semantic":[66,85,124],"information":[67],"tabular":[69],"data.":[70],"To":[71],"address":[72],"these":[73],"limitations,":[74],"this":[75,152],"paper":[76],"proposes":[77],"LLM-TDA,":[78],"novel":[80],"framework":[81],"that":[82,139],"leverages":[83],"reasoning":[86],"capabilities":[87],"Large":[89],"Language":[90],"Models":[91],"(LLMs)":[92],"optimize":[94],"feature":[96,120,176],"process.":[98],"LLM-TDA":[99,140],"employs":[100],"coarse-to-fine":[102],"pipeline:":[103],"first,":[104],"table-level":[106],"analysis":[107],"rapidly":[108],"prunes":[109],"semantically":[110],"tables":[112],"based":[113],"business":[115],"logic;":[116],"second,":[117],"fine-grained":[119],"selection":[121],"mechanism":[122],"fuses":[123],"understanding":[125],"with":[126],"assessment":[128],"identify":[130],"high-value":[131],"features.":[132],"Extensive":[133],"experiments":[134],"real-world":[136],"datasets":[137],"demonstrate":[138],"significantly":[141],"outperforms":[142],"state-of-the-art":[143],"baselines":[144],"task":[147],"performance.":[148],"Notably,":[149],"it":[150],"achieves":[151],"accuracy":[153],"delivering":[155],"10-fold":[157],"14-fold":[159],"improvement":[160],"computational":[162],"efficiency":[163],"compared":[164],"methods,":[167],"providing":[168],"robust":[170],"scalable":[172],"solution":[173],"automated":[175],"engineering.":[177]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-04-02T00:00:00"}
