{"id":"https://openalex.org/W4319984996","doi":"https://doi.org/10.1080/08839514.2023.2175112","title":"A study on the evaluation of tokenizer performance in natural language processing","display_name":"A study on the evaluation of tokenizer performance in natural language processing","publication_year":2023,"publication_date":"2023-02-09","ids":{"openalex":"https://openalex.org/W4319984996","doi":"https://doi.org/10.1080/08839514.2023.2175112"},"language":"en","primary_location":{"id":"doi:10.1080/08839514.2023.2175112","is_oa":true,"landing_page_url":"https://doi.org/10.1080/08839514.2023.2175112","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/08839514.2023.2175112?download=true","source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.tandfonline.com/doi/pdf/10.1080/08839514.2023.2175112?download=true","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034106271","display_name":"Sanghyun Choo","orcid":"https://orcid.org/0000-0002-8884-3437"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanghyun Choo","raw_affiliation_strings":["Edward P. Fitts Department of Industrial and Systems Engineering, North Carolina State University, Raleigh, NC, USA"],"raw_orcid":"https://orcid.org/0000-0002-8884-3437","affiliations":[{"raw_affiliation_string":"Edward P. Fitts Department of Industrial and Systems Engineering, North Carolina State University, Raleigh, NC, USA","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038180509","display_name":"Wonjoon Kim","orcid":"https://orcid.org/0000-0001-5177-8072"},"institutions":[{"id":"https://openalex.org/I110217816","display_name":"Dongduk Women's University","ror":"https://ror.org/039p7ck60","country_code":"KR","type":"education","lineage":["https://openalex.org/I110217816"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Wonjoon Kim","raw_affiliation_strings":["Division of Future Convergence (HCI Science Major), Dongduk Women\u2019s University, Seoul, South Korea","Division of Future Convergence (HCI Science Major), Dongduk Women's University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-5177-8072","affiliations":[{"raw_affiliation_string":"Division of Future Convergence (HCI Science Major), Dongduk Women\u2019s University, Seoul, South Korea","institution_ids":["https://openalex.org/I110217816"]},{"raw_affiliation_string":"Division of Future Convergence (HCI Science Major), Dongduk Women's University, Seoul, South Korea","institution_ids":["https://openalex.org/I110217816"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038180509"],"corresponding_institution_ids":["https://openalex.org/I110217816"],"apc_list":{"value":2195,"currency":"USD","value_usd":2195},"apc_paid":{"value":2195,"currency":"USD","value_usd":2195},"fwci":7.7255,"has_fulltext":true,"cited_by_count":47,"citation_normalized_percentile":{"value":0.97974457,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"37","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.95660001039505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9065290689468384},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5275048017501831},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.44402652978897095},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.411737859249115},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.390823632478714},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3257400393486023}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9065290689468384},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5275048017501831},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.44402652978897095},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.411737859249115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.390823632478714},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3257400393486023},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1080/08839514.2023.2175112","is_oa":true,"landing_page_url":"https://doi.org/10.1080/08839514.2023.2175112","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/08839514.2023.2175112?download=true","source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6b889d8e4f914ec8afc02c3d29c98834","is_oa":true,"landing_page_url":"https://doaj.org/article/6b889d8e4f914ec8afc02c3d29c98834","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Applied Artificial Intelligence, Vol 37, Iss 1 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1080/08839514.2023.2175112","is_oa":true,"landing_page_url":"https://doi.org/10.1080/08839514.2023.2175112","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/08839514.2023.2175112?download=true","source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5614650948","display_name":null,"funder_award_id":"2020R1G1A1003384","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4319984996.pdf","grobid_xml":"https://content.openalex.org/works/W4319984996.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W43382659","https://openalex.org/W1523349167","https://openalex.org/W1572786359","https://openalex.org/W1598796236","https://openalex.org/W1993879187","https://openalex.org/W2039429194","https://openalex.org/W2064675550","https://openalex.org/W2077160697","https://openalex.org/W2095112907","https://openalex.org/W2106998212","https://openalex.org/W2172699681","https://openalex.org/W2612769033","https://openalex.org/W2741043385","https://openalex.org/W2756280475","https://openalex.org/W2790025829","https://openalex.org/W2801831183","https://openalex.org/W2809807853","https://openalex.org/W2810931684","https://openalex.org/W2887379829","https://openalex.org/W2888784229","https://openalex.org/W2892072280","https://openalex.org/W2899555336","https://openalex.org/W2911494872","https://openalex.org/W2918429901","https://openalex.org/W2937269099","https://openalex.org/W2952230511","https://openalex.org/W2972982672","https://openalex.org/W2990188683","https://openalex.org/W2998272617","https://openalex.org/W2999913064","https://openalex.org/W3003618396","https://openalex.org/W3004070643","https://openalex.org/W3017952061","https://openalex.org/W3022458612","https://openalex.org/W3028754898","https://openalex.org/W3058507927","https://openalex.org/W3081987387","https://openalex.org/W3099520540","https://openalex.org/W3111704875","https://openalex.org/W3115865922","https://openalex.org/W3123035962","https://openalex.org/W3173414368","https://openalex.org/W3198021170","https://openalex.org/W3201170131","https://openalex.org/W4200000026","https://openalex.org/W4200579154","https://openalex.org/W4207025902","https://openalex.org/W4224283955","https://openalex.org/W4289677850","https://openalex.org/W4294311165","https://openalex.org/W4294690845","https://openalex.org/W4296764940","https://openalex.org/W4310507698","https://openalex.org/W6683738474"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4283262748","https://openalex.org/W2502773048","https://openalex.org/W4252362398","https://openalex.org/W48866389","https://openalex.org/W2117651364","https://openalex.org/W3204019825","https://openalex.org/W4226226396","https://openalex.org/W3153750606","https://openalex.org/W4308854837"],"abstract_inverted_index":{"The":[0,26,66,84,108],"present":[1],"study":[2,27,109],"aims":[3],"to":[4,59,127,142],"compare":[5],"and":[6,14,50,82,123],"analyze":[7],"the":[8,17,61,77,95,98,105,119],"performance":[9,62,67],"of":[10,19,63,97,121],"two":[11],"tokenizers,":[12],"Mecab-Ko":[13],"SentencePiece,":[15],"in":[16,76,118,149],"context":[18,120],"natural":[20],"language":[21],"processing":[22],"for":[23],"sentiment":[24,130],"analysis.":[25],"adopts":[28],"a":[29],"comparative":[30],"approach,":[31],"employing":[32],"five":[33],"algorithms":[34],"-":[35,58],"Naive":[36],"Bayes":[37],"(NB),":[38],"k-Nearest":[39],"Neighbor":[40],"(kNN),":[41],"Support":[42],"Vector":[43],"Machine":[44],"(SVM),":[45],"Artificial":[46],"Neural":[47,55],"Networks":[48,56],"(ANN),":[49],"Long":[51],"Short-Term":[52],"Memory":[53],"Recurrent":[54],"(LSTM-RNN)":[57],"evaluate":[60],"each":[64],"tokenizer.":[65],"was":[68],"assessed":[69],"based":[70,131],"on":[71,104,132],"four":[72],"widely":[73],"used":[74,126,148],"metrics":[75],"field,":[78],"accuracy,":[79],"precision,":[80],"recall,":[81],"F1-score.":[83],"results":[85],"indicated":[86],"that":[87,111],"SentencePiece":[88,112,137],"performed":[89],"better":[90],"than":[91],"Mecab-Ko.":[92],"To":[93],"ensure":[94],"validity":[96],"results,":[99],"paired":[100],"t-tests":[101],"were":[102],"conducted":[103],"evaluation":[106],"outcomes.":[107],"concludes":[110],"demonstrated":[113],"superior":[114],"classification":[115],"performance,":[116],"especially":[117],"ANN":[122],"LSTM-RNN,":[124],"when":[125],"interpret":[128],"customer":[129],"Korean":[133],"online":[134],"reviews.":[135],"Furthermore,":[136],"can":[138],"assign":[139],"specific":[140],"meanings":[141],"short":[143],"words":[144],"or":[145],"jargon":[146],"commonly":[147],"product":[150],"evaluations":[151],"but":[152],"not":[153],"defined":[154],"beforehand.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5}],"updated_date":"2026-06-03T09:05:47.796612","created_date":"2025-10-10T00:00:00"}
