{"id":"https://openalex.org/W2263270445","doi":"https://doi.org/10.1109/tkde.2016.2563436","title":"Toward Optimal Feature Selection in Naive Bayes for Text Categorization","display_name":"Toward Optimal Feature Selection in Naive Bayes for Text Categorization","publication_year":2016,"publication_date":"2016-05-05","ids":{"openalex":"https://openalex.org/W2263270445","doi":"https://doi.org/10.1109/tkde.2016.2563436","mag":"2263270445"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2016.2563436","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2563436","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1602.02850","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074257864","display_name":"Bo Tang","orcid":"https://orcid.org/0000-0001-5708-766X"},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bo Tang","raw_affiliation_strings":["Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019494783","display_name":"Steven Kay","orcid":"https://orcid.org/0000-0002-9267-8040"},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven Kay","raw_affiliation_strings":["Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100411297","display_name":"Haibo He","orcid":"https://orcid.org/0000-0002-5247-9370"},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haibo He","raw_affiliation_strings":["Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer, and Biomedical Engineering, University of Rhode Island, Kingston, RI","institution_ids":["https://openalex.org/I17626003"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5074257864"],"corresponding_institution_ids":["https://openalex.org/I17626003"],"apc_list":null,"apc_paid":null,"fwci":44.784,"has_fulltext":false,"cited_by_count":252,"citation_normalized_percentile":{"value":0.99793578,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"28","issue":"9","first_page":"2508","last_page":"2521"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7933287024497986},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7478421926498413},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7355837821960449},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6763702630996704},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6419797539710999},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5941764712333679},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.573989987373352},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.529085636138916},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5045279264450073},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4035260081291199},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3778959810733795},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37592414021492004},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.2540496587753296},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.1866404116153717}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7933287024497986},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7478421926498413},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7355837821960449},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6763702630996704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6419797539710999},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5941764712333679},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.573989987373352},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.529085636138916},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5045279264450073},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4035260081291199},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3778959810733795},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37592414021492004},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.2540496587753296},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.1866404116153717},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tkde.2016.2563436","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2563436","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1602.02850","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1602.02850","pdf_url":"https://arxiv.org/pdf/1602.02850","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:digitalcommons.uri.edu:ele_facpubs-1471","is_oa":true,"landing_page_url":"https://digitalcommons.uri.edu/ele_facpubs/472","pdf_url":null,"source":{"id":"https://openalex.org/S2764761010","display_name":"Journal of Media Literacy Education","issn_l":"2167-8715","issn":["2167-8715"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310316378","host_organization_name":"National Association for Media Literacy Education","host_organization_lineage":["https://openalex.org/P4310316378"],"host_organization_lineage_names":["National Association for Media Literacy Education"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Electrical, Computer, and Biomedical Engineering Faculty Publications","raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1602.02850","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1602.02850","pdf_url":"https://arxiv.org/pdf/1602.02850","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G1992257409","display_name":null,"funder_award_id":"W911NF-12-1-0378","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W16136051","https://openalex.org/W67881473","https://openalex.org/W107306860","https://openalex.org/W1500568346","https://openalex.org/W1523389133","https://openalex.org/W1530699444","https://openalex.org/W1549887922","https://openalex.org/W1550206324","https://openalex.org/W1556911442","https://openalex.org/W1576676390","https://openalex.org/W1597870463","https://openalex.org/W1614298861","https://openalex.org/W1620204465","https://openalex.org/W1924689489","https://openalex.org/W1962363309","https://openalex.org/W1967203824","https://openalex.org/W1978394996","https://openalex.org/W2006681603","https://openalex.org/W2017337590","https://openalex.org/W2033068437","https://openalex.org/W2042587503","https://openalex.org/W2056519815","https://openalex.org/W2071415686","https://openalex.org/W2087609354","https://openalex.org/W2095489189","https://openalex.org/W2102831150","https://openalex.org/W2103333826","https://openalex.org/W2118020653","https://openalex.org/W2118978333","https://openalex.org/W2123838014","https://openalex.org/W2129877315","https://openalex.org/W2131462252","https://openalex.org/W2131744502","https://openalex.org/W2133864802","https://openalex.org/W2135813353","https://openalex.org/W2142515059","https://openalex.org/W2146950091","https://openalex.org/W2147735646","https://openalex.org/W2149684865","https://openalex.org/W2158139315","https://openalex.org/W2160536005","https://openalex.org/W2165612380","https://openalex.org/W2166183437","https://openalex.org/W2168084958","https://openalex.org/W2169658215","https://openalex.org/W2249181615","https://openalex.org/W2343999414","https://openalex.org/W2348210445","https://openalex.org/W2435251607","https://openalex.org/W2949547296","https://openalex.org/W2950577311","https://openalex.org/W3014813097","https://openalex.org/W3105045783","https://openalex.org/W4300906944","https://openalex.org/W4302591315","https://openalex.org/W6602738186","https://openalex.org/W6604296810","https://openalex.org/W6632852417","https://openalex.org/W6632865047","https://openalex.org/W6634786571","https://openalex.org/W6636378376","https://openalex.org/W6636510571","https://openalex.org/W6675969814","https://openalex.org/W6679224782","https://openalex.org/W6679775712","https://openalex.org/W6680007464","https://openalex.org/W6683557909","https://openalex.org/W6683971644","https://openalex.org/W6717827561"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2390857744","https://openalex.org/W2390698788","https://openalex.org/W2133651098","https://openalex.org/W2078570174","https://openalex.org/W2383063829","https://openalex.org/W2138922887","https://openalex.org/W2111353337","https://openalex.org/W2371357422","https://openalex.org/W2035261173"],"abstract_inverted_index":{"Automated":[0],"feature":[1,10,29,107],"selection":[2,30,108],"is":[3],"important":[4],"for":[5,47,60,96,120],"text":[6,121],"categorization":[7],"to":[8,13,39,70,92],"reduce":[9],"size":[11],"and":[12,27,57,64,73,118],"speed":[14],"up":[15],"learning":[16],"process":[17],"of":[18,77,126,132],"classifiers.":[19],"In":[20],"this":[21],"paper,":[22],"we":[23,103],"present":[24],"a":[25,78,84],"novel":[26],"efficient":[28,106],"framework":[31],"based":[32],"on":[33,100],"the":[34,41,101,130,133],"Information":[35],"Theory,":[36],"which":[37],"aims":[38],"rank":[40],"features":[42],"with":[43],"their":[44,66],"discriminative":[45],"capacity":[46],"classification.":[48,98],"We":[49,81],"first":[50],"revisit":[51],"two":[52,105],"information":[53],"measures:":[54],"Kullback-Leibler":[55],"divergence":[56,59,86,95],"Jeffreys":[58],"binary":[61],"hypothesis":[62],"testing,":[63],"analyze":[65],"asymptotic":[67],"properties":[68],"relating":[69],"type":[71,74],"I":[72],"II":[75],"errors":[76],"Bayesian":[79],"classifier.":[80],"then":[82],"introduce":[83],"new":[85],"measure,":[87],"called":[88],"Jeffreys-Multi-Hypothesis":[89],"(JMH)":[90],"divergence,":[91],"measure":[93],"multi-distribution":[94],"multi-class":[97],"Based":[99],"JMH-divergence,":[102],"develop":[104],"methods,":[109,119],"termed":[110],"maximum":[111],"discrimination":[112],"(":[113],"<inline-formula><tex-math":[114],"notation=\"LaTeX\">$MD$</tex-math>":[115],"</inline-formula>":[116],")":[117],"categorization.":[122],"The":[123],"promising":[124],"results":[125],"extensive":[127],"experiments":[128],"demonstrate":[129],"effectiveness":[131],"proposed":[134],"approaches.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":28},{"year":2021,"cited_by_count":39},{"year":2020,"cited_by_count":44},{"year":2019,"cited_by_count":35},{"year":2018,"cited_by_count":35},{"year":2017,"cited_by_count":22},{"year":2016,"cited_by_count":9}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
