{"id":"https://openalex.org/W4327656064","doi":"https://doi.org/10.1162/tacl_a_00542","title":"An Empirical Survey of Data Augmentation for Limited Data Learning in NLP","display_name":"An Empirical Survey of Data Augmentation for Limited Data Learning in NLP","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4327656064","doi":"https://doi.org/10.1162/tacl_a_00542"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00542","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00542","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00542/2074871/tacl_a_00542.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00542/2074871/tacl_a_00542.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102986685","display_name":"Jiaao Chen","orcid":"https://orcid.org/0009-0004-8425-2893"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jiaao Chen","raw_affiliation_strings":["Georgia Institute of Technology, USA. jchen896@gatech.edu"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, USA. jchen896@gatech.edu","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112879125","display_name":"Derek Tam","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I1333535994","display_name":"University of North Carolina Health Care","ror":"https://ror.org/00qz24g20","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1333535994"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Derek Tam","raw_affiliation_strings":["UNC Chapel Hill, USA. dtredsox@cs.unc.edu"],"affiliations":[{"raw_affiliation_string":"UNC Chapel Hill, USA. dtredsox@cs.unc.edu","institution_ids":["https://openalex.org/I1333535994","https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045077843","display_name":"Colin Raffel","orcid":null},"institutions":[{"id":"https://openalex.org/I1333535994","display_name":"University of North Carolina Health Care","ror":"https://ror.org/00qz24g20","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1333535994"]},{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Colin Raffel","raw_affiliation_strings":["UNC Chapel Hill, USA. craffel@cs.unc.edu"],"affiliations":[{"raw_affiliation_string":"UNC Chapel Hill, USA. craffel@cs.unc.edu","institution_ids":["https://openalex.org/I1333535994","https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001987532","display_name":"Mohit Bansal","orcid":"https://orcid.org/0000-0001-5522-1351"},"institutions":[{"id":"https://openalex.org/I1333535994","display_name":"University of North Carolina Health Care","ror":"https://ror.org/00qz24g20","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1333535994"]},{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohit Bansal","raw_affiliation_strings":["UNC Chapel Hill, USA. mbansal@cs.unc.edu"],"affiliations":[{"raw_affiliation_string":"UNC Chapel Hill, USA. mbansal@cs.unc.edu","institution_ids":["https://openalex.org/I1333535994","https://openalex.org/I114027177"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089413311","display_name":"Diyi Yang","orcid":"https://orcid.org/0000-0003-1220-3983"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Diyi Yang","raw_affiliation_strings":["Georgia Institute of Technology, USA. dyang888@gatech.edu"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, USA. dyang888@gatech.edu","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5001987532","https://openalex.org/A5045077843","https://openalex.org/A5089413311","https://openalex.org/A5102986685","https://openalex.org/A5112879125"],"corresponding_institution_ids":["https://openalex.org/I114027177","https://openalex.org/I130701444","https://openalex.org/I1333535994"],"apc_list":null,"apc_paid":null,"fwci":21.9747,"has_fulltext":false,"cited_by_count":145,"citation_normalized_percentile":{"value":0.99644472,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":100},"biblio":{"volume":"11","issue":null,"first_page":"191","last_page":"211"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8188512325286865},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7224453687667847},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6534075140953064},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.575178861618042},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5520095825195312},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5148600935935974},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.43547922372817993},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4333633482456207}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8188512325286865},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7224453687667847},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6534075140953064},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.575178861618042},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5520095825195312},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5148600935935974},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.43547922372817993},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4333633482456207},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00542","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00542","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00542/2074871/tacl_a_00542.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b26ac7ab1b3a4593bc9c80298cc4e58e","is_oa":true,"landing_page_url":"https://doaj.org/article/b26ac7ab1b3a4593bc9c80298cc4e58e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 11, Pp 191-211 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00542","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00542","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00542/2074871/tacl_a_00542.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4327656064.pdf"},"referenced_works_count":184,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1607035479","https://openalex.org/W1945616565","https://openalex.org/W1983320747","https://openalex.org/W1984541135","https://openalex.org/W2048679005","https://openalex.org/W2101210369","https://openalex.org/W2156163116","https://openalex.org/W2163605009","https://openalex.org/W2170240176","https://openalex.org/W2250473257","https://openalex.org/W2251658415","https://openalex.org/W2550821151","https://openalex.org/W2559655401","https://openalex.org/W2561274697","https://openalex.org/W2594978815","https://openalex.org/W2610850660","https://openalex.org/W2612690371","https://openalex.org/W2746314669","https://openalex.org/W2755124548","https://openalex.org/W2787711783","https://openalex.org/W2798858969","https://openalex.org/W2799007037","https://openalex.org/W2799194071","https://openalex.org/W2799915114","https://openalex.org/W2811010710","https://openalex.org/W2856961199","https://openalex.org/W2887428522","https://openalex.org/W2889326796","https://openalex.org/W2889577585","https://openalex.org/W2890007195","https://openalex.org/W2890719433","https://openalex.org/W2891602716","https://openalex.org/W2898856000","https://openalex.org/W2905266130","https://openalex.org/W2913961733","https://openalex.org/W2914859268","https://openalex.org/W2915480215","https://openalex.org/W2915722758","https://openalex.org/W2922917409","https://openalex.org/W2923014074","https://openalex.org/W2945232141","https://openalex.org/W2946068894","https://openalex.org/W2947683321","https://openalex.org/W2949128310","https://openalex.org/W2949736877","https://openalex.org/W2951286828","https://openalex.org/W2951328433","https://openalex.org/W2962369866","https://openalex.org/W2962689740","https://openalex.org/W2962739339","https://openalex.org/W2962805889","https://openalex.org/W2962897020","https://openalex.org/W2963018920","https://openalex.org/W2963126845","https://openalex.org/W2963216553","https://openalex.org/W2963223306","https://openalex.org/W2963357083","https://openalex.org/W2963364041","https://openalex.org/W2963459241","https://openalex.org/W2963490498","https://openalex.org/W2963545917","https://openalex.org/W2963655793","https://openalex.org/W2963777632","https://openalex.org/W2963783970","https://openalex.org/W2963823140","https://openalex.org/W2963858765","https://openalex.org/W2963956670","https://openalex.org/W2963969878","https://openalex.org/W2964094426","https://openalex.org/W2964159205","https://openalex.org/W2964170290","https://openalex.org/W2970418174","https://openalex.org/W2970706333","https://openalex.org/W2970796366","https://openalex.org/W2970947563","https://openalex.org/W2971152344","https://openalex.org/W2971252690","https://openalex.org/W2971296908","https://openalex.org/W2972668418","https://openalex.org/W2982225063","https://openalex.org/W2982836234","https://openalex.org/W2983391927","https://openalex.org/W2985884876","https://openalex.org/W2986068180","https://openalex.org/W2998107946","https://openalex.org/W2998184481","https://openalex.org/W2998277219","https://openalex.org/W2998508940","https://openalex.org/W3001197829","https://openalex.org/W3004437239","https://openalex.org/W3015001695","https://openalex.org/W3015944346","https://openalex.org/W3023553115","https://openalex.org/W3027026357","https://openalex.org/W3030469652","https://openalex.org/W3034340683","https://openalex.org/W3034942609","https://openalex.org/W3035204084","https://openalex.org/W3035282664","https://openalex.org/W3035331128","https://openalex.org/W3035352537","https://openalex.org/W3035542229","https://openalex.org/W3035577668","https://openalex.org/W3038105747","https://openalex.org/W3043172396","https://openalex.org/W3087231533","https://openalex.org/W3089659770","https://openalex.org/W3098341425","https://openalex.org/W3098843277","https://openalex.org/W3099126561","https://openalex.org/W3100268441","https://openalex.org/W3100742171","https://openalex.org/W3101980583","https://openalex.org/W3102903864","https://openalex.org/W3103291112","https://openalex.org/W3104423855","https://openalex.org/W3105241646","https://openalex.org/W3105261549","https://openalex.org/W3105604018","https://openalex.org/W3105725479","https://openalex.org/W3106156541","https://openalex.org/W3153427360","https://openalex.org/W3156333129","https://openalex.org/W3168656614","https://openalex.org/W3172642864","https://openalex.org/W3172794097","https://openalex.org/W3173274550","https://openalex.org/W3174828871","https://openalex.org/W3175982906","https://openalex.org/W3176265725","https://openalex.org/W3201090304","https://openalex.org/W3214193653","https://openalex.org/W4212774754","https://openalex.org/W4288089799","https://openalex.org/W4288282820","https://openalex.org/W4295253143","https://openalex.org/W4300996741","https://openalex.org/W4303105513","https://openalex.org/W4310895557","https://openalex.org/W4385573325","https://openalex.org/W4404783242","https://openalex.org/W6600949241","https://openalex.org/W6640425456","https://openalex.org/W6674701899","https://openalex.org/W6675944832","https://openalex.org/W6676568861","https://openalex.org/W6678975374","https://openalex.org/W6679436768","https://openalex.org/W6680110197","https://openalex.org/W6684191040","https://openalex.org/W6684702499","https://openalex.org/W6685053522","https://openalex.org/W6690725250","https://openalex.org/W6728698659","https://openalex.org/W6730161283","https://openalex.org/W6733814495","https://openalex.org/W6734716764","https://openalex.org/W6734807902","https://openalex.org/W6743428213","https://openalex.org/W6745136726","https://openalex.org/W6745388339","https://openalex.org/W6745847742","https://openalex.org/W6746141323","https://openalex.org/W6752876022","https://openalex.org/W6755207826","https://openalex.org/W6759455113","https://openalex.org/W6760456230","https://openalex.org/W6762913911","https://openalex.org/W6764051988","https://openalex.org/W6765544429","https://openalex.org/W6765939562","https://openalex.org/W6768222176","https://openalex.org/W6769627184","https://openalex.org/W6773005947","https://openalex.org/W6773944720","https://openalex.org/W6774569510","https://openalex.org/W6774932648","https://openalex.org/W6778738830","https://openalex.org/W6778883912","https://openalex.org/W6782125931","https://openalex.org/W6783973398","https://openalex.org/W6801486040","https://openalex.org/W6811004012"],"related_works":["https://openalex.org/W2970530566","https://openalex.org/W2967478618","https://openalex.org/W2997152889","https://openalex.org/W4385572700","https://openalex.org/W4388335561","https://openalex.org/W4307309205","https://openalex.org/W4288261899","https://openalex.org/W4385009901","https://openalex.org/W4285141722","https://openalex.org/W4387768015"],"abstract_inverted_index":{"Abstract":[0],"NLP":[1,26,81,114],"has":[2,71],"achieved":[3],"great":[4],"progress":[5,109],"in":[6,66,82,96,115,167,182],"the":[7,11,83,116,122,155,172],"past":[8],"decade":[9],"through":[10],"use":[12],"of":[13,49,62,77,107,124],"neural":[14],"models":[15,27],"and":[16,133,136,150,170,175],"large":[17],"labeled":[18,85,118],"datasets.":[19],"The":[20],"dependence":[21],"on":[22,110,140,154],"abundant":[23],"data":[24,53,64,78,86,111,119,180],"prevents":[25],"from":[28],"being":[29],"applied":[30],"to":[31,45,91,161],"low-resource":[32],"settings":[33,169],"or":[34,41],"novel":[35],"tasks":[36],"where":[37],"significant":[38],"time,":[39],"money,":[40],"expertise":[42],"is":[43],"required":[44],"label":[46],"massive":[47],"amounts":[48],"textual":[50],"data.":[51],"Recently,":[52],"augmentation":[54,79,112],"methods":[55,94,125],"have":[56],"been":[57,72],"explored":[58],"as":[59],"a":[60],"means":[61],"improving":[63],"efficiency":[65],"NLP.":[67,183],"To":[68],"date,":[69],"there":[70],"no":[73],"systematic":[74],"empirical":[75,105],"overview":[76],"for":[80,113,178],"limited":[84,117,179],"setting,":[87,120],"making":[88],"it":[89],"difficult":[90],"understand":[92],"which":[93,97],"work":[95],"settings.":[98],"In":[99],"this":[100],"paper,":[101],"we":[102,157],"provide":[103],"an":[104],"survey":[106],"recent":[108],"summarizing":[121],"landscape":[123],"(including":[126],"token-level":[127],"augmentations,":[128,130,132],"sentence-level":[129],"adversarial":[131],"hidden-space":[134],"augmentations)":[135],"carrying":[137],"out":[138],"experiments":[139],"11":[141],"datasets":[142],"covering":[143],"topics/news":[144],"classification,":[145],"inference":[146],"tasks,":[147,149],"paraphrasing":[148],"single-sentence":[151],"tasks.":[152],"Based":[153],"results,":[156],"draw":[158],"several":[159],"conclusions":[160],"help":[162],"practitioners":[163],"choose":[164],"appropriate":[165],"augmentations":[166],"different":[168],"discuss":[171],"current":[173],"challenges":[174],"future":[176],"directions":[177],"learning":[181]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":51},{"year":2024,"cited_by_count":39},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":18},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
