{"id":"https://openalex.org/W7138109339","doi":"https://doi.org/10.1609/aaai.v40i38.40508","title":"DEALT: LLM-driven Diversity-Enhanced Data Augmentation for Long-Tail Text Classification","display_name":"DEALT: LLM-driven Diversity-Enhanced Data Augmentation for Long-Tail Text Classification","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138109339","doi":"https://doi.org/10.1609/aaai.v40i38.40508"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i38.40508","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40508","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40508/44469","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40508/44469","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129665656","display_name":"Wayne Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wayne Lu","raw_affiliation_strings":["Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Independent Researcher","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129704449","display_name":"Xiaoxi Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I92472073","display_name":"Takenaka (Japan)","ror":"https://ror.org/02zs45744","country_code":"JP","type":"company","lineage":["https://openalex.org/I92472073"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xiaoxi Cui","raw_affiliation_strings":["Takway.AI"],"affiliations":[{"raw_affiliation_string":"Takway.AI","institution_ids":["https://openalex.org/I92472073"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5129665656"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32869379,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"38","first_page":"32338","last_page":"32346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.19300000369548798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.19300000369548798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.17630000412464142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.17229999601840973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5031999945640564},{"id":"https://openalex.org/keywords/validator","display_name":"Validator","score":0.4366999864578247},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3862000107765198},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.3675999939441681},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.33889999985694885},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.33390000462532043},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.3328999876976013},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.3287999927997589}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7688000202178955},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6166999936103821},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5200999975204468},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5031999945640564},{"id":"https://openalex.org/C35292069","wikidata":"https://www.wikidata.org/wiki/Q1575458","display_name":"Validator","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3862000107765198},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3675999939441681},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.367000013589859},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3474000096321106},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i38.40508","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40508","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40508/44469","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i38.40508","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40508","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40508/44469","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.40770742297172546,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138109339.pdf","grobid_xml":"https://content.openalex.org/works/W7138109339.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-world":[0],"text":[1,143],"classification":[2,144],"datasets":[3,145],"frequently":[4],"exhibit":[5],"long-tail":[6],"distributions,":[7],"where":[8],"numerous":[9],"classes":[10],"have":[11],"sparse":[12],"data,":[13,118],"significantly":[14],"degrading":[15],"model":[16,131,158],"performance":[17,155],"on":[18,129,140],"these":[19,52],"underrepresented":[20],"categories.":[21],"While":[22],"Large":[23],"Language":[24],"Models":[25],"(LLMs)":[26],"offer":[27],"promise":[28],"for":[29,61,99],"data":[30,84],"augmentation,":[31],"existing":[32],"methods":[33,151],"often":[34],"produce":[35],"semantically":[36],"limited":[37],"samples,":[38],"neglect":[39],"\"implicit":[40],"long-tails\"":[41],"(sparse":[42],"sub-patterns":[43],"within":[44],"classes),":[45],"and":[46,78,91,112,119,135,156,164],"lack":[47],"cost-effective":[48],"optimization.":[49,137],"To":[50],"address":[51],"challenges,":[53],"we":[54],"propose":[55],"\\textbf{DEALT":[56],"(LLM-driven":[57],"Diversity-Enhanced":[58],"Data":[59],"Augmentation":[60],"Long-Tail":[62],"Text":[63],"Classification)},":[64],"a":[65],"novel":[66],"cognitive-inspired":[67],"framework":[68],"emulating":[69],"the":[70,116],"human":[71],"learning":[72],"process":[73],"of":[74,102],"\"recognize,":[75],"explore,":[76],"generate,":[77],"optimize.\"":[79],"DEALT":[80],"systematically":[81],"enhances":[82],"augmented":[83,166],"diversity":[85,113],"by":[86,106,160],"first":[87],"detecting":[88],"both":[89],"explicit":[90],"implicit":[92],"long-tails.":[93],"It":[94],"then":[95],"employs":[96],"an":[97,120],"LLM":[98],"diversity-aware":[100],"planning":[101],"augmentation":[103,126],"strategies,":[104],"followed":[105],"conditional":[107],"generation.":[108],"A":[109],"low-overhead":[110],"quality":[111],"validator":[114],"filters":[115],"synthetic":[117],"adaptive":[121],"incremental":[122],"sampler":[123],"refines":[124],"future":[125],"efforts":[127],"based":[128],"proxy":[130],"feedback,":[132],"ensuring":[133],"efficient":[134],"budget-aware":[136],"Extensive":[138],"experiments":[139],"multiple":[141],"public":[142],"demonstrate":[146],"DEALT's":[147],"superiority":[148],"over":[149],"state-of-the-art":[150],"in":[152],"improving":[153],"tail-class":[154],"overall":[157],"robustness":[159],"generating":[161],"more":[162],"diverse":[163],"high-fidelity":[165],"data.":[167]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
