{"id":"https://openalex.org/W4288388952","doi":"https://doi.org/10.1109/jcsse54890.2022.9836268","title":"Tokenization-based data augmentation for text classification","display_name":"Tokenization-based data augmentation for text classification","publication_year":2022,"publication_date":"2022-06-22","ids":{"openalex":"https://openalex.org/W4288388952","doi":"https://doi.org/10.1109/jcsse54890.2022.9836268"},"language":"en","primary_location":{"id":"doi:10.1109/jcsse54890.2022.9836268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcsse54890.2022.9836268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 19th International Joint Conference on Computer Science and Software Engineering (JCSSE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071711018","display_name":"Patawee Prakrankamanant","orcid":null},"institutions":[{"id":"https://openalex.org/I158708052","display_name":"Chulalongkorn University","ror":"https://ror.org/028wp3y58","country_code":"TH","type":"education","lineage":["https://openalex.org/I158708052"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Patawee Prakrankamanant","raw_affiliation_strings":["Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand","Department of Computer Engineering, Chulalongkorn University, Bangkok, Thailand"],"affiliations":[{"raw_affiliation_string":"Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand","institution_ids":["https://openalex.org/I158708052"]},{"raw_affiliation_string":"Department of Computer Engineering, Chulalongkorn University, Bangkok, Thailand","institution_ids":["https://openalex.org/I158708052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030009288","display_name":"Ekapol Chuangsuwanich","orcid":"https://orcid.org/0000-0001-6104-4857"},"institutions":[{"id":"https://openalex.org/I158708052","display_name":"Chulalongkorn University","ror":"https://ror.org/028wp3y58","country_code":"TH","type":"education","lineage":["https://openalex.org/I158708052"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Ekapol Chuangsuwanich","raw_affiliation_strings":["Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand","Department of Computer Engineering, Chulalongkorn University, Bangkok, Thailand"],"affiliations":[{"raw_affiliation_string":"Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand","institution_ids":["https://openalex.org/I158708052"]},{"raw_affiliation_string":"Department of Computer Engineering, Chulalongkorn University, Bangkok, Thailand","institution_ids":["https://openalex.org/I158708052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5071711018"],"corresponding_institution_ids":["https://openalex.org/I158708052"],"apc_list":null,"apc_paid":null,"fwci":0.9283,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.78593559,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.9576854705810547},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8507940769195557},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7326876521110535},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6123282313346863},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5812654495239258},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5139167308807373},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46520668268203735},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06967124342918396}],"concepts":[{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.9576854705810547},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8507940769195557},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7326876521110535},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6123282313346863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5812654495239258},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5139167308807373},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46520668268203735},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06967124342918396},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jcsse54890.2022.9836268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcsse54890.2022.9836268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 19th International Joint Conference on Computer Science and Software Engineering (JCSSE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W1832693441","https://openalex.org/W1904365287","https://openalex.org/W2095705004","https://openalex.org/W2117364574","https://openalex.org/W2118585731","https://openalex.org/W2136528950","https://openalex.org/W2170240176","https://openalex.org/W2250473257","https://openalex.org/W2501941158","https://openalex.org/W2511239188","https://openalex.org/W2525778437","https://openalex.org/W2689696018","https://openalex.org/W2785845370","https://openalex.org/W2896457183","https://openalex.org/W2905266130","https://openalex.org/W2952524414","https://openalex.org/W2962784628","https://openalex.org/W2963026768","https://openalex.org/W2963250244","https://openalex.org/W2963626623","https://openalex.org/W2963658982","https://openalex.org/W2963979492","https://openalex.org/W2964529779","https://openalex.org/W2965373594","https://openalex.org/W2987479080","https://openalex.org/W3014673391","https://openalex.org/W3034238904","https://openalex.org/W3037045905","https://openalex.org/W3043962629","https://openalex.org/W3094132514","https://openalex.org/W3205367839","https://openalex.org/W4287370843","https://openalex.org/W4288023028","https://openalex.org/W4294536576","https://openalex.org/W6636510571","https://openalex.org/W6638575559","https://openalex.org/W6640036494","https://openalex.org/W6674330103","https://openalex.org/W6677656871","https://openalex.org/W6685053522","https://openalex.org/W6727690538","https://openalex.org/W6736731929","https://openalex.org/W6755207826","https://openalex.org/W6766673545","https://openalex.org/W6769477329","https://openalex.org/W6775473433","https://openalex.org/W6784754156","https://openalex.org/W6790219574"],"related_works":["https://openalex.org/W4292346028","https://openalex.org/W2725310424","https://openalex.org/W4282591925","https://openalex.org/W2963023579","https://openalex.org/W2601638452","https://openalex.org/W2766198569","https://openalex.org/W4300598845","https://openalex.org/W2285263069","https://openalex.org/W4319309671","https://openalex.org/W4376107815"],"abstract_inverted_index":{"Tokenization":[0],"is":[1,31,36,39],"one":[2,17],"of":[3,18,64,75],"the":[4,11,19,24,34,62,73,91],"most":[5],"important":[6],"data":[7,57,105],"preprocessing":[8],"steps":[9],"in":[10,23],"text":[12,81],"classification":[13,82],"task":[14],"and":[15,38,98],"also":[16],"main":[20],"contributing":[21],"factors":[22],"model":[25,92],"performance.":[26],"However,":[27],"getting":[28],"good":[29],"tokenizations":[30],"non-trivial":[32],"when":[33],"input":[35],"noisy,":[37],"especially":[40],"problematic":[41],"for":[42],"languages":[43],"without":[44],"an":[45,55],"explicit":[46],"word":[47],"delimiter":[48],"such":[49],"as":[50],"Thai.":[51],"Therefore,":[52],"we":[53],"propose":[54],"alternative":[56],"augmentation":[58,88,106],"method":[59],"to":[60,95],"improve":[61],"robustness":[63],"poor":[65],"tokenization":[66,96],"by":[67],"using":[68],"multiple":[69],"tokenizations.":[70],"We":[71],"evaluate":[72],"performance":[74],"our":[76,87],"algorithms":[77],"on":[78],"different":[79],"Thai":[80],"datasets.":[83],"The":[84],"results":[85],"suggest":[86],"scheme":[89],"makes":[90],"more":[93],"robust":[94],"errors":[97],"can":[99],"be":[100],"combined":[101],"well":[102],"with":[103],"other":[104],"schemes.":[107]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
