{"id":"https://openalex.org/W1969128289","doi":"https://doi.org/10.1145/1982185.1982389","title":"Entropy based feature selection for text categorization","display_name":"Entropy based feature selection for text categorization","publication_year":2011,"publication_date":"2011-03-21","ids":{"openalex":"https://openalex.org/W1969128289","doi":"https://doi.org/10.1145/1982185.1982389","mag":"1969128289"},"language":"en","primary_location":{"id":"doi:10.1145/1982185.1982389","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1982185.1982389","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-00617969","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086633120","display_name":"Christine Largeron","orcid":"https://orcid.org/0000-0003-1059-4095"},"institutions":[{"id":"https://openalex.org/I100532134","display_name":"Universit\u00e9 Claude Bernard Lyon 1","ror":"https://ror.org/029brtt94","country_code":"FR","type":"education","lineage":["https://openalex.org/I100532134","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Christine Largeron","raw_affiliation_strings":["Universit\u00e9 de Lyon, Saint-\u00c9tienne, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Lyon, Saint-\u00c9tienne, France","institution_ids":["https://openalex.org/I100532134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110385687","display_name":"Christophe Moulin","orcid":null},"institutions":[{"id":"https://openalex.org/I100532134","display_name":"Universit\u00e9 Claude Bernard Lyon 1","ror":"https://ror.org/029brtt94","country_code":"FR","type":"education","lineage":["https://openalex.org/I100532134","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Christophe Moulin","raw_affiliation_strings":["Universit\u00e9 de Lyon, Saint-\u00c9tienne, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Lyon, Saint-\u00c9tienne, France","institution_ids":["https://openalex.org/I100532134"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080829901","display_name":"Mathias G\u00e9ry","orcid":"https://orcid.org/0009-0003-6182-5911"},"institutions":[{"id":"https://openalex.org/I100532134","display_name":"Universit\u00e9 Claude Bernard Lyon 1","ror":"https://ror.org/029brtt94","country_code":"FR","type":"education","lineage":["https://openalex.org/I100532134","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mathias G\u00e9ry","raw_affiliation_strings":["Universit\u00e9 de Lyon, Saint-\u00c9tienne, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Lyon, Saint-\u00c9tienne, France","institution_ids":["https://openalex.org/I100532134"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086633120"],"corresponding_institution_ids":["https://openalex.org/I100532134"],"apc_list":null,"apc_paid":null,"fwci":3.9472,"has_fulltext":false,"cited_by_count":83,"citation_normalized_percentile":{"value":0.93517147,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"924","last_page":"928"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7719587087631226},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7403608560562134},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.675987720489502},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.6521208882331848},{"id":"https://openalex.org/keywords/encyclopedia","display_name":"Encyclopedia","score":0.6445738673210144},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.5784809589385986},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.5773040652275085},{"id":"https://openalex.org/keywords/information-gain","display_name":"Information gain","score":0.5481435060501099},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5016398429870605},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.41959917545318604},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39751535654067993},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37292373180389404}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7719587087631226},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7403608560562134},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.675987720489502},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.6521208882331848},{"id":"https://openalex.org/C148863701","wikidata":"https://www.wikidata.org/wiki/Q5292","display_name":"Encyclopedia","level":2,"score":0.6445738673210144},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.5784809589385986},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.5773040652275085},{"id":"https://openalex.org/C2983203078","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Information gain","level":2,"score":0.5481435060501099},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5016398429870605},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.41959917545318604},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39751535654067993},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37292373180389404},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1982185.1982389","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1982185.1982389","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM Symposium on Applied Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.393.5066","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.393.5066","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://hal.inria.fr/docs/00/61/79/69/PDF/sac_2011.pdf","raw_type":"text"},{"id":"pmh:oai:HAL:hal-00617969v1","is_oa":true,"landing_page_url":"https://hal.science/hal-00617969","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Symposium on Applied Computing, Mar 2011, TaiChung, Taiwan. pp.924-928, &#x27E8;10.1145/1982185.1982389&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-00617969v1","is_oa":true,"landing_page_url":"https://hal.science/hal-00617969","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Symposium on Applied Computing, Mar 2011, TaiChung, Taiwan. pp.924-928, &#x27E8;10.1145/1982185.1982389&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.46000000834465027,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1486589216","https://openalex.org/W1523389133","https://openalex.org/W1576676390","https://openalex.org/W1983078185","https://openalex.org/W1995875735","https://openalex.org/W2005422315","https://openalex.org/W2043772506","https://openalex.org/W2045117606","https://openalex.org/W2087609354","https://openalex.org/W2098162425","https://openalex.org/W2103333826","https://openalex.org/W2118020653","https://openalex.org/W2118585731","https://openalex.org/W2135276756","https://openalex.org/W2140190241","https://openalex.org/W2147152072","https://openalex.org/W2149684865","https://openalex.org/W2150102617","https://openalex.org/W2156909104","https://openalex.org/W2165612380","https://openalex.org/W2170654002","https://openalex.org/W2186428165","https://openalex.org/W2435251607","https://openalex.org/W2945850567","https://openalex.org/W2997501009","https://openalex.org/W3001645704","https://openalex.org/W3005526988","https://openalex.org/W3119603699","https://openalex.org/W4290864872"],"related_works":["https://openalex.org/W2386078281","https://openalex.org/W4287553507","https://openalex.org/W3111660818","https://openalex.org/W2367691850","https://openalex.org/W2069321575","https://openalex.org/W1482708182","https://openalex.org/W2328484534","https://openalex.org/W2533731304","https://openalex.org/W3174433205","https://openalex.org/W2241978443"],"abstract_inverted_index":{"In":[0,24],"text":[1],"categorization,":[2],"feature":[3,30,76,114],"selection":[4,31,77,113],"can":[5],"be":[6],"essential":[7],"not":[8],"only":[9],"for":[10,17],"reducing":[11],"the":[12,19,22,41,49,52,55,58,62,110,118],"index":[13],"size":[14],"but":[15,60],"also":[16],"improving":[18],"performance":[20],"of":[21,51,99,112],"classifier.":[23],"this":[25,44,106],"article,":[26],"we":[27],"propose":[28],"a":[29,96],"criterion,":[32],"called":[33],"Entropy":[34],"based":[35,47,79],"Category":[36],"Coverage":[37],"Difference":[38],"(ECCD).":[39],"On":[40],"one":[42],"hand,":[43,64],"criterion":[45],"is":[46],"on":[48,61,80,95],"distribution":[50],"documents":[53,101],"containing":[54],"term":[56],"in":[57],"categories,":[59],"other":[63],"it":[65],"takes":[66],"into":[67],"account":[68],"its":[69],"entropy.":[70],"ECCD":[71],"compares":[72],"favorably":[73],"with":[74],"usual":[75],"methods":[78],"document":[81],"frequency":[82],"(DF),":[83],"information":[84,88],"gain":[85],"(IG),":[86],"mutual":[87],"(IM),":[89],"\u03c72,":[90],"odd":[91],"ratio":[92],"and":[93],"GSS":[94],"large":[97],"collection":[98],"XML":[100],"from":[102,117],"Wikipedia":[103],"encyclopedia.":[104],"Moreover,":[105],"comparative":[107],"study":[108],"confirms":[109],"effectiveness":[111],"techniques":[115],"derived":[116],"\u03c72":[119],"statistics.":[120]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":13},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
