{"id":"https://openalex.org/W2604152035","doi":"https://doi.org/10.1142/s021848851750009x","title":"The Hybrid Filter Feature Selection Methods for Improving High-Dimensional Text Categorization","display_name":"The Hybrid Filter Feature Selection Methods for Improving High-Dimensional Text Categorization","publication_year":2017,"publication_date":"2017-04-01","ids":{"openalex":"https://openalex.org/W2604152035","doi":"https://doi.org/10.1142/s021848851750009x","mag":"2604152035"},"language":"en","primary_location":{"id":"doi:10.1142/s021848851750009x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021848851750009x","pdf_url":null,"source":{"id":"https://openalex.org/S69518169","display_name":"International Journal of Uncertainty Fuzziness and Knowledge-Based Systems","issn_l":"0218-4885","issn":["0218-4885","1793-6411"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069599417","display_name":"Le Nguyen Hoai Nam","orcid":"https://orcid.org/0000-0001-9675-2191"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]},{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Le Nguyen Hoai Nam","raw_affiliation_strings":["Department of Information System, The School of Information Technology, VNUHCM \u2013 the University of Science, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Department of Information System, The School of Information Technology, VNUHCM \u2013 the University of Science, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085078415","display_name":"Ho Bao Quoc","orcid":null},"institutions":[{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]},{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Ho Bao Quoc","raw_affiliation_strings":["Department of Information System, The School of Information Technology, VNUHCM \u2013 the University of Science, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Department of Information System, The School of Information Technology, VNUHCM \u2013 the University of Science, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5069599417"],"corresponding_institution_ids":["https://openalex.org/I123565023","https://openalex.org/I23582244"],"apc_list":null,"apc_paid":null,"fwci":0.7801,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.78420284,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"25","issue":"02","first_page":"235","last_page":"265"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7903938293457031},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6953794956207275},{"id":"https://openalex.org/keywords/centroid","display_name":"Centroid","score":0.6883217096328735},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6103650331497192},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5856471657752991},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5813945531845093},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5778681039810181},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5522550940513611},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5330578088760376},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4944005012512207},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.486814945936203},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4739135503768921},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.452939510345459},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.4294235408306122}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7903938293457031},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6953794956207275},{"id":"https://openalex.org/C146599234","wikidata":"https://www.wikidata.org/wiki/Q511093","display_name":"Centroid","level":2,"score":0.6883217096328735},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6103650331497192},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5856471657752991},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5813945531845093},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5778681039810181},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5522550940513611},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5330578088760376},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4944005012512207},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.486814945936203},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4739135503768921},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.452939510345459},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.4294235408306122},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s021848851750009x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021848851750009x","pdf_url":null,"source":{"id":"https://openalex.org/S69518169","display_name":"International Journal of Uncertainty Fuzziness and Knowledge-Based Systems","issn_l":"0218-4885","issn":["0218-4885","1793-6411"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5299999713897705}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1982589161","https://openalex.org/W1999635750","https://openalex.org/W2002352636","https://openalex.org/W2010836425","https://openalex.org/W2013390089","https://openalex.org/W2049736842","https://openalex.org/W2069602768","https://openalex.org/W2090702536","https://openalex.org/W2091669653","https://openalex.org/W2093452304","https://openalex.org/W2098162425","https://openalex.org/W2098888151","https://openalex.org/W2108150588","https://openalex.org/W2134090438","https://openalex.org/W2135847698","https://openalex.org/W2148511931","https://openalex.org/W2162223169","https://openalex.org/W4236137412","https://openalex.org/W4247582552"],"related_works":["https://openalex.org/W2375828317","https://openalex.org/W2120026622","https://openalex.org/W2886616187","https://openalex.org/W2391010859","https://openalex.org/W2385233088","https://openalex.org/W2053724255","https://openalex.org/W2048060766","https://openalex.org/W2066259560","https://openalex.org/W2355203151","https://openalex.org/W2772780115"],"abstract_inverted_index":{"The":[0,128],"bag-of-words":[1,25,62],"technique":[2],"is":[3,27,130,150,245],"often":[4],"used":[5],"to":[6,91,114,185,247,260],"present":[7],"a":[8,15,33,37,131,144,151],"document":[9,176],"in":[10,125],"text":[11,30,126],"categorization.":[12,127],"However,":[13],"for":[14,168,174,182,265],"large":[16],"set":[17],"of":[18,23,39,54,75,119,133,147,233],"documents":[19],"where":[20],"the":[21,24,52,61,69,83,87,92,102,108,117,120,134,138,142,148,171,179,187,197,203,210,216,220,224,234,236,239,243,248,252,261,266],"dimension":[22,240],"vector":[26],"very":[28],"high,":[29],"categorization":[31],"becomes":[32],"serious":[34],"challenge":[35],"as":[36,196],"result":[38],"sparse":[40],"data,":[41],"over-fitting,":[42],"and":[43,72,86,107,137,165,178,223,238,256],"irrelevant":[44,58],"features.":[45],"A":[46],"filter":[47,77,97,121],"feature":[48,78,98,122],"selection":[49,79,99,123],"method":[50],"reduces":[51],"number":[53],"features":[55,59],"by":[56],"eliminating":[57],"from":[60,226],"vector.":[63],"In":[64,231],"this":[65],"paper,":[66],"we":[67,94],"analyze":[68],"weak":[70],"points":[71,74],"strong":[73],"two":[76],"approaches":[80],"which":[81],"are":[82],"frequency-based":[84],"approach":[85,136],"cluster-based":[88],"approach.":[89],"Thanks":[90],"analysis,":[93],"propose":[95],"hybrid":[96,153],"methods,":[100,250,263],"named":[101],"Frequency-Cluster":[103,110],"Feature":[104,111,200,207,213,228],"Selection":[105,112,201,208,214,229],"(FCFS)":[106],"Detailed":[109],"(DtFCFS),":[113],"further":[115],"improve":[116],"performance":[118,259],"process":[124],"FCFS":[129,253],"combination":[132],"Frequency-based":[135],"Cluster-based":[139],"approach,":[140],"while":[141,251],"DtFCFS,":[143],"detailed":[145],"version":[146],"FCFS,":[149],"comprehensively":[152],"clusterbased":[154],"method.":[155],"We":[156],"do":[157],"experiments":[158],"with":[159,190],"four":[160],"benchmark":[161],"datasets":[162],"(the":[163],"Reuters-21578":[164],"Newsgroup":[166],"dataset":[167,173,181],"news":[169],"classification,":[170,177],"Ohsumed":[172],"medical":[175],"LingSpam":[180],"email":[183],"classification)":[184],"compare":[186],"proposed":[188],"methods":[189,194],"six":[191],"related":[192],"wellknown":[193],"such":[195],"Comprehensive":[198],"Measurement":[199],"(CMFS),":[202],"Optimal":[204],"Orthogonal":[205],"Centroid":[206,212],"(OCFS),":[209],"Crossed":[211],"(CIIC),":[215],"Information":[217],"Gain":[218],"(IG),":[219],"Chi-square":[221],"(CHI),":[222],"Deviation":[225],"Poisson":[227],"(DFPFS).":[230],"terms":[232],"Micro-F1,":[235],"Macro-F1,":[237],"reduction":[241],"rate,":[242],"DtFCFS":[244],"superior":[246,258],"other":[249],"shows":[254],"competitive":[255],"even":[257],"good":[262],"especially":[264],"Macro-F1.":[267]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
