{"id":"https://openalex.org/W1537064529","doi":"https://doi.org/10.1109/ijcnn.2005.1556452","title":"Information-theoretic feature selection algorithms for text classification","display_name":"Information-theoretic feature selection algorithms for text classification","publication_year":2006,"publication_date":"2006-01-05","ids":{"openalex":"https://openalex.org/W1537064529","doi":"https://doi.org/10.1109/ijcnn.2005.1556452","mag":"1537064529"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2005.1556452","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2005.1556452","pdf_url":null,"source":{"id":"https://openalex.org/S4363609022","display_name":"Proceedings. 2005 IEEE International Joint Conference on Neural Networks, 2005.","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. 2005 IEEE International Joint Conference on Neural Networks, 2005.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5097581813","display_name":"J. Novovicovai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119419","display_name":"Czech Academy of Sciences, Institute of Information Theory and Automation","ror":"https://ror.org/03h1hsz49","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210119419"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"J. Novovicovai","raw_affiliation_strings":["Inst. of Inf. Theor. & Autom., Acad. of Sci. of the Czech Republic, Prague, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Inst. of Inf. Theor. & Autom., Acad. of Sci. of the Czech Republic, Prague, Czech Republic","institution_ids":["https://openalex.org/I4210119419"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015352281","display_name":"Adnan Malik","orcid":"https://orcid.org/0000-0002-6034-0117"},"institutions":[{"id":"https://openalex.org/I4210119419","display_name":"Czech Academy of Sciences, Institute of Information Theory and Automation","ror":"https://ror.org/03h1hsz49","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210119419"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"A. Malik","raw_affiliation_strings":["Institute of Information Theory and Automation, Academy of Sciences\uc2a0of the\uc2a0Czech Republic, Prague, Czech Republic","Inst. of Inf. Theor. & Autom., Acad. of Sci. of the Czech Republic, Prague, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Institute of Information Theory and Automation, Academy of Sciences\uc2a0of the\uc2a0Czech Republic, Prague, Czech Republic","institution_ids":["https://openalex.org/I4210119419"]},{"raw_affiliation_string":"Inst. of Inf. Theor. & Autom., Acad. of Sci. of the Czech Republic, Prague, Czech Republic","institution_ids":["https://openalex.org/I4210119419"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5097581813"],"corresponding_institution_ids":["https://openalex.org/I4210119419"],"apc_list":null,"apc_paid":null,"fwci":2.4475,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.88577435,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"5","issue":null,"first_page":"3272","last_page":"3277"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7322686910629272},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7049845457077026},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6582364439964294},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6219320297241211},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.5893768668174744},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5759775042533875},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5430895090103149},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4956444501876831},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4907456636428833},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4863584637641907},{"id":"https://openalex.org/keywords/statistical-classification","display_name":"Statistical classification","score":0.47470608353614807},{"id":"https://openalex.org/keywords/linear-classifier","display_name":"Linear classifier","score":0.4607445001602173},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.44782769680023193},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4322349429130554},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4197009205818176},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38161131739616394}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7322686910629272},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7049845457077026},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6582364439964294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6219320297241211},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.5893768668174744},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5759775042533875},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5430895090103149},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4956444501876831},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4907456636428833},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4863584637641907},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.47470608353614807},{"id":"https://openalex.org/C139532973","wikidata":"https://www.wikidata.org/wiki/Q2679259","display_name":"Linear classifier","level":3,"score":0.4607445001602173},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.44782769680023193},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4322349429130554},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4197009205818176},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38161131739616394}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn.2005.1556452","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2005.1556452","pdf_url":null,"source":{"id":"https://openalex.org/S4363609022","display_name":"Proceedings. 2005 IEEE International Joint Conference on Neural Networks, 2005.","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. 2005 IEEE International Joint Conference on Neural Networks, 2005.","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.324.73","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.324.73","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://staff.utia.cas.cz/novovic/files/1483.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321006","display_name":"Grantov\u00e1 Agentura \u010cesk\u00e9 Republiky","ror":"https://ror.org/01pv73b02"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W578816368","https://openalex.org/W1549887922","https://openalex.org/W1550206324","https://openalex.org/W1592212241","https://openalex.org/W1601145406","https://openalex.org/W1898334332","https://openalex.org/W2005422315","https://openalex.org/W2014915963","https://openalex.org/W2029296527","https://openalex.org/W2041837861","https://openalex.org/W2097089247","https://openalex.org/W2103333826","https://openalex.org/W2107835995","https://openalex.org/W2114535528","https://openalex.org/W2118020653","https://openalex.org/W2132549764","https://openalex.org/W2134544888","https://openalex.org/W2149684865","https://openalex.org/W2149772057","https://openalex.org/W2435251607","https://openalex.org/W6632852417","https://openalex.org/W6632865047","https://openalex.org/W6682304300","https://openalex.org/W6717827561"],"related_works":["https://openalex.org/W2100540162","https://openalex.org/W2953034154","https://openalex.org/W1980687006","https://openalex.org/W2950683444","https://openalex.org/W1996483796","https://openalex.org/W2158177428","https://openalex.org/W2998259334","https://openalex.org/W2550798287","https://openalex.org/W1569375842","https://openalex.org/W2544352368"],"abstract_inverted_index":{"A":[0],"major":[1],"characteristic":[2],"of":[3,12,29,47,109],"text":[4,13,30,117],"document":[5],"classification":[6],"problem":[7],"is":[8,61],"extremely":[9],"high":[10],"dimensionality":[11],"data.":[14],"In":[15],"this":[16],"paper,":[17],"we":[18,89],"present":[19,64],"four":[20],"new":[21],"algorithms":[22,114],"for":[23,26],"feature/word":[24],"selection":[25,36,113],"the":[27,48,54,84,91,107,110],"purpose":[28],"classification.":[31,118],"We":[32,63],"use":[33],"sequential":[34],"forward":[35],"methods":[37],"based":[38,71],"on":[39,72,83],"improved":[40],"mutual":[41],"information":[42,55],"criterion":[43],"functions.":[44],"The":[45],"performance":[46],"proposed":[49,111],"evaluation":[50],"functions":[51],"compared":[52],"to":[53],"gain":[56],"which":[57],"evaluate":[58],"features":[59],"individually":[60],"discussed.":[62],"experimental":[65,92,104],"results":[66,93,105],"using":[67],"naive":[68],"Bayes":[69],"classifier":[70],"multinomial":[73],"model,":[74],"linear":[75],"support":[76],"vector":[77],"machine":[78],"and":[79,100],"k-nearest":[80],"neighbor":[81],"classifiers":[82],"Reuters":[85],"data":[86],"set.":[87],"Finally,":[88],"analyze":[90],"from":[94],"various":[95],"perspectives,":[96],"including":[97],"precision,":[98],"recall":[99],"F/sub":[101],"1/-measure.":[102],"Preliminary":[103],"indicate":[106],"effectiveness":[108],"feature":[112],"in":[115],"a":[116]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
