{"id":"https://openalex.org/W3199885604","doi":"https://doi.org/10.3233/faia210036","title":"An Empirical Investigation of Online News Classification on an Open-Domain, Large-Scale and High-Quality Dataset in Vietnamese","display_name":"An Empirical Investigation of Online News Classification on an Open-Domain, Large-Scale and High-Quality Dataset in Vietnamese","publication_year":2021,"publication_date":"2021-09-08","ids":{"openalex":"https://openalex.org/W3199885604","doi":"https://doi.org/10.3233/faia210036","mag":"3199885604"},"language":"en","primary_location":{"id":"doi:10.3233/faia210036","is_oa":false,"landing_page_url":"https://doi.org/10.3233/faia210036","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015913850","display_name":"Khanh Quoc Tran","orcid":"https://orcid.org/0000-0003-1288-8003"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Khanh Quoc Tran","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]},{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061629917","display_name":"Phap Ngoc Trinh","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Phap Ngoc Trinh","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]},{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030909759","display_name":"Khoa Nguyen-Anh Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Khoa Nguyen-Anh Tran","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]},{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074242059","display_name":"An Tran-Hoai Le","orcid":"https://orcid.org/0000-0002-0521-963X"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"An Tran-Hoai Le","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]},{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010753496","display_name":"Luan Van Ha","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Luan Van Ha","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]},{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam; Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046262541","display_name":"Kiet Van Nguyen","orcid":"https://orcid.org/0000-0002-8456-2742"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Kiet Van Nguyen","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh city, Vietnam","Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh city, Vietnam","institution_ids":[]},{"raw_affiliation_string":"Vietnam National University, Ho Chi Minh City, Vietnam Email: 18520908@gm.uit.edu.vn, 18521227@gm.uit.edu.vn, 8520938@gm.uit.edu.vn, 18520426@gm.uit.edu.vn, 8521062@gm.uit.edu.vn, kietnv@uit.edu.vn","institution_ids":["https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5015913850"],"corresponding_institution_ids":["https://openalex.org/I123565023"],"apc_list":null,"apc_paid":null,"fwci":0.344,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57592892,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8014092445373535},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.7767711281776428},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6576653122901917},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6454477906227112},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5799513459205627},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4932815432548523},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33613014221191406},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.2701307535171509}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8014092445373535},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.7767711281776428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6576653122901917},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6454477906227112},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5799513459205627},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4932815432548523},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33613014221191406},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2701307535171509},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia210036","is_oa":false,"landing_page_url":"https://doi.org/10.3233/faia210036","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2901286616","https://openalex.org/W3113733647","https://openalex.org/W4206924063","https://openalex.org/W2901718966","https://openalex.org/W3042674643","https://openalex.org/W3022757400","https://openalex.org/W1989494794","https://openalex.org/W2122031327","https://openalex.org/W1184238669","https://openalex.org/W3032901101"],"abstract_inverted_index":{"In":[0,33,59,146],"this":[1],"paper,":[2],"we":[3,35,61,148],"build":[4],"a":[5,95,107,111],"new":[6],"dataset":[7,42,158],"UIT-ViON":[8],"(Vietnamese":[9],"Online":[10],"Newspaper)":[11],"collected":[12],"from":[13],"well-known":[14],"online":[15],"newspapers":[16],"in":[17],"Vietnamese.":[18],"We":[19],"collect,":[20],"process,":[21],"and":[22,40,85,88,92,110,129,132,159],"create":[23],"the":[24,63,76,97,101,154,157,161],"dataset,":[25,77],"then":[26],"experiment":[27],"with":[28,50],"different":[29],"machine":[30,81],"learning":[31,68,82,90],"models.":[32,165],"particular,":[34],"propose":[36,150],"an":[37,115],"open-domain,":[38],"large-scale,":[39],"high-quality":[41],"consisting":[43],"of":[44,103,156,163],"260,000":[45],"textual":[46],"data":[47],"points":[48],"annotated":[49],"multiple":[51],"labels":[52],"for":[53,70,113,136],"evaluating":[54],"Vietnamese":[55,71],"short":[56,72],"text":[57,73],"classification.":[58],"addition,":[60],"present":[62],"proposed":[64,98,123],"approach":[65,99],"using":[66],"transformer-based":[67],"(PhoBERT)":[69],"classification":[74,118,164],"on":[75],"which":[78],"outperforms":[79],"traditional":[80],"(Naive":[83],"Bayes":[84],"Logistic":[86],"Regression)":[87],"deep":[89],"(Text-CNN":[91],"LSTM).":[93],"As":[94],"result,":[96],"achieves":[100],"F1-score":[102],"80.62%.":[104],"This":[105],"is":[106,122],"positive":[108],"result":[109],"premise":[112],"developing":[114],"automatic":[116],"news":[117,140],"system.":[119],"The":[120],"study":[121],"to":[124,138,142,152],"significantly":[125],"save":[126],"time,":[127],"costs,":[128],"human":[130],"resources":[131],"make":[133],"it":[134],"easier":[135],"readers":[137],"find":[139],"related":[141],"their":[143],"interesting":[144],"topics.":[145],"future,":[147],"will":[149],"solutions":[151],"improve":[153,160],"quality":[155],"performance":[162]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
