{"id":"https://openalex.org/W4386165948","doi":"https://doi.org/10.1017/s1351324923000438","title":"Improving short text classification with augmented data using GPT-3","display_name":"Improving short text classification with augmented data using GPT-3","publication_year":2023,"publication_date":"2023-08-25","ids":{"openalex":"https://openalex.org/W4386165948","doi":"https://doi.org/10.1017/s1351324923000438"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324923000438","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324923000438","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/4F23066E3F0156382190BD76DA9A7BA5/S1351324923000438a.pdf/div-class-title-improving-short-text-classification-with-augmented-data-using-gpt-3-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/4F23066E3F0156382190BD76DA9A7BA5/S1351324923000438a.pdf/div-class-title-improving-short-text-classification-with-augmented-data-using-gpt-3-div.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032739207","display_name":"Salvador V. Balkus","orcid":"https://orcid.org/0000-0003-4695-833X"},"institutions":[{"id":"https://openalex.org/I100633361","display_name":"University of Massachusetts Dartmouth","ror":"https://ror.org/00fzmm222","country_code":"US","type":"education","lineage":["https://openalex.org/I100633361"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Salvador V. Balkus","raw_affiliation_strings":["Program in Data Science, University of Massachusetts Dartmouth, Dartmouth, MA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4695-833X","affiliations":[{"raw_affiliation_string":"Program in Data Science, University of Massachusetts Dartmouth, Dartmouth, MA, USA","institution_ids":["https://openalex.org/I100633361"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060954578","display_name":"Donghui Yan","orcid":"https://orcid.org/0000-0002-5131-1509"},"institutions":[{"id":"https://openalex.org/I100633361","display_name":"University of Massachusetts Dartmouth","ror":"https://ror.org/00fzmm222","country_code":"US","type":"education","lineage":["https://openalex.org/I100633361"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Donghui Yan","raw_affiliation_strings":["Department of Mathematics, University of Massachusetts Dartmouth, Dartmouth, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mathematics, University of Massachusetts Dartmouth, Dartmouth, MA, USA","institution_ids":["https://openalex.org/I100633361"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5032739207"],"corresponding_institution_ids":["https://openalex.org/I100633361"],"apc_list":null,"apc_paid":null,"fwci":10.2185,"has_fulltext":true,"cited_by_count":64,"citation_normalized_percentile":{"value":0.98712042,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"30","issue":"5","first_page":"943","last_page":"972"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8694098591804504},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6022756099700928},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5837292671203613},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5788636803627014},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5751012563705444},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5620013475418091},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5600385665893555},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5409314632415771},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5325345993041992},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.446443110704422},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.4276259243488312},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34125614166259766},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3277815580368042},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.2719373404979706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8694098591804504},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6022756099700928},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5837292671203613},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5788636803627014},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5751012563705444},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5620013475418091},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5600385665893555},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5409314632415771},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5325345993041992},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.446443110704422},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.4276259243488312},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34125614166259766},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3277815580368042},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2719373404979706},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324923000438","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324923000438","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/4F23066E3F0156382190BD76DA9A7BA5/S1351324923000438a.pdf/div-class-title-improving-short-text-classification-with-augmented-data-using-gpt-3-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1017/s1351324923000438","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324923000438","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/4F23066E3F0156382190BD76DA9A7BA5/S1351324923000438a.pdf/div-class-title-improving-short-text-classification-with-augmented-data-using-gpt-3-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8600000143051147}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310400","display_name":"Dartmouth College","ror":"https://ror.org/049s0rh22"},{"id":"https://openalex.org/F4320315739","display_name":"University of Massachusetts Dartmouth","ror":"https://ror.org/00fzmm222"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386165948.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1634625917","https://openalex.org/W1984762104","https://openalex.org/W2022637272","https://openalex.org/W2165698076","https://openalex.org/W2317515691","https://openalex.org/W2408246687","https://openalex.org/W2593914038","https://openalex.org/W2800318991","https://openalex.org/W2883158411","https://openalex.org/W2883387735","https://openalex.org/W2900065283","https://openalex.org/W2919821184","https://openalex.org/W2937423263","https://openalex.org/W2943552823","https://openalex.org/W2954996726","https://openalex.org/W2963216553","https://openalex.org/W2979750067","https://openalex.org/W2998184481","https://openalex.org/W3010293452","https://openalex.org/W3011995931","https://openalex.org/W3046441874","https://openalex.org/W3094704314","https://openalex.org/W3100789280","https://openalex.org/W3105190746","https://openalex.org/W3105625590","https://openalex.org/W3106171756","https://openalex.org/W3112103703","https://openalex.org/W3156333129","https://openalex.org/W3179950556","https://openalex.org/W3183859557","https://openalex.org/W4285178284","https://openalex.org/W4288089799","https://openalex.org/W4289828103","https://openalex.org/W4304701418","https://openalex.org/W4385573478","https://openalex.org/W6769627184","https://openalex.org/W6781051561"],"related_works":["https://openalex.org/W1987706094","https://openalex.org/W2250717533","https://openalex.org/W3193088696","https://openalex.org/W3132346564","https://openalex.org/W2977842567","https://openalex.org/W2250140425","https://openalex.org/W4381430104","https://openalex.org/W2734587838","https://openalex.org/W4226059458","https://openalex.org/W3201070945"],"abstract_inverted_index":{"Abstract":[0],"GPT-3":[1,39,66,88,157],"is":[2,72],"a":[3,27,35,51,70,79,115],"large-scale":[4],"natural":[5],"language":[6,154],"model":[7],"developed":[8],"by":[9,57,77,87],"OpenAI":[10],"that":[11,23,120,131],"can":[12,166],"perform":[13],"many":[14],"different":[15],"tasks,":[16],"including":[17],"topic":[18],"classification.":[19],"Although":[20],"researchers":[21],"claim":[22],"it":[24],"requires":[25,40],"only":[26],"small":[28,80],"number":[29],"of":[30,47,127,140,147],"in-context":[31],"examples":[32,43,85,165],"to":[33,44,67,74,144,160],"learn":[34],"task,":[36],"in":[37],"practice":[38],"these":[41],"training":[42,81,102,164],"be":[45],"either":[46],"exceptional":[48],"quality":[49],"or":[50],"higher":[52],"quantity":[53],"than":[54],"easily":[55],"created":[56],"hand.":[58],"To":[59],"address":[60],"this":[61,63,150],"issue,":[62],"study":[64,91],"teaches":[65],"classify":[68],"whether":[69],"question":[71],"related":[73],"data":[75,121],"science":[76],"augmenting":[78],"set":[82,103],"with":[83,99,109],"additional":[84],"generated":[86],"itself.":[89],"This":[90],"compares":[92],"two":[93],"augmented":[94,111],"classifiers:":[95],"the":[96,106,125,132,137,158],"Classification":[97,134],"Endpoint":[98,108,135],"an":[100,110],"increased":[101],"size":[104],"and":[105,130],"Completion":[107],"prompt":[112],"optimized":[113],"using":[114],"genetic":[116],"algorithm.":[117],"We":[118],"find":[119],"augmentation":[122],"significantly":[123],"increases":[124],"accuracy":[126,139,146],"both":[128],"classifiers,":[129],"embedding-based":[133],"achieves":[136],"best":[138],"about":[141],"76%,":[142],"compared":[143],"human":[145],"85%.":[148],"In":[149],"way,":[151],"giving":[152],"large":[153],"models":[155],"like":[156],"ability":[159],"propose":[161],"their":[162],"own":[163],"improve":[167],"short":[168],"text":[169],"classification":[170],"performance.":[171]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":35},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-18T10:00:31.954636","created_date":"2025-10-10T00:00:00"}
