{"id":"https://openalex.org/W2070382004","doi":"https://doi.org/10.1145/2557977.2558041","title":"Improved categorical distribution difference feature selection for Chinese document categorization","display_name":"Improved categorical distribution difference feature selection for Chinese document categorization","publication_year":2014,"publication_date":"2014-01-09","ids":{"openalex":"https://openalex.org/W2070382004","doi":"https://doi.org/10.1145/2557977.2558041","mag":"2070382004"},"language":"en","primary_location":{"id":"doi:10.1145/2557977.2558041","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2557977.2558041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th International Conference on Ubiquitous Information Management and Communication","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101821064","display_name":"Qiang Li","orcid":"https://orcid.org/0000-0002-2477-4962"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiang Li","raw_affiliation_strings":["East China Normal University, Shanghai","East China Normal University (Shanghai)"],"affiliations":[{"raw_affiliation_string":"East China Normal University, Shanghai","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"East China Normal University (Shanghai)","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010540039","display_name":"Liang He","orcid":"https://orcid.org/0000-0002-4723-5486"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang He","raw_affiliation_strings":["East China Normal University, Shanghai","East China Normal University (Shanghai)"],"affiliations":[{"raw_affiliation_string":"East China Normal University, Shanghai","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"East China Normal University (Shanghai)","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100398281","display_name":"Xin Lin","orcid":"https://orcid.org/0000-0001-6913-4654"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Lin","raw_affiliation_strings":["East China Normal University, Shanghai","East China Normal University (Shanghai)"],"affiliations":[{"raw_affiliation_string":"East China Normal University, Shanghai","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"East China Normal University (Shanghai)","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101821064"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":0.409,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74491553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13734","display_name":"Advanced Computational Techniques and Applications","score":0.9757999777793884,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9567000269889832,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.8609476089477539},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.8314489126205444},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7243497371673584},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.676355242729187},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6512041091918945},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.6450074315071106},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.6448807716369629},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6281764507293701},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5866361856460571},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4778968095779419},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46890366077423096},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4001016318798065},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3527287244796753},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3368798494338989}],"concepts":[{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.8609476089477539},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.8314489126205444},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7243497371673584},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.676355242729187},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6512041091918945},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.6450074315071106},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.6448807716369629},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6281764507293701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5866361856460571},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4778968095779419},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46890366077423096},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4001016318798065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3527287244796753},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3368798494338989},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2557977.2558041","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2557977.2558041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th International Conference on Ubiquitous Information Management and Communication","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5351613714","display_name":null,"funder_award_id":"11530700300","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"}],"funders":[{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1964438691","https://openalex.org/W1978821416","https://openalex.org/W2056258790","https://openalex.org/W2134090438","https://openalex.org/W2136114015","https://openalex.org/W2161089438","https://openalex.org/W2361483642","https://openalex.org/W2372424341","https://openalex.org/W2435251607","https://openalex.org/W2545861285"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2390857744","https://openalex.org/W2133651098","https://openalex.org/W2390698788","https://openalex.org/W2383063829","https://openalex.org/W2138922887","https://openalex.org/W2082678934","https://openalex.org/W2111353337","https://openalex.org/W2371357422","https://openalex.org/W2035261173"],"abstract_inverted_index":{"Feature":[0],"selection":[1,127],"is":[2],"an":[3],"important":[4],"process":[5],"to":[6,13,44],"choose":[7],"a":[8,14,37],"subset":[9],"of":[10,57,61,69,79,92,123],"features":[11,76],"relevant":[12],"particular":[15],"application":[16],"in":[17,54,77,129],"document":[18,25,94,131],"classification.":[19],"Firstly,":[20],"based":[21],"on":[22],"the":[23,46,55,74,89,93,99,102,121,124],"categorical":[24],"frequency":[26,85],"probability":[27],"(CDFP),":[28],"CDFP_VM":[29,47],"criterion":[30,48],"was":[31,42],"designed":[32],"for":[33,64],"feature":[34,126],"selection.":[35],"Secondly,":[36],"maximum":[38],"conditional":[39],"distribution":[40],"factor":[41],"proposed":[43,125],"improve":[45],"further.":[49],"The":[50,117],"method":[51,128],"has":[52],"advantages":[53],"case":[56],"choosing":[58],"smaller":[59],"number":[60,68],"features,":[62],"especially":[63],"classes":[65],"with":[66,101],"small":[67],"training":[70],"documents.":[71],"It":[72],"keeps":[73],"best":[75],"favor":[78],"neither":[80],"high":[81],"nor":[82],"low":[83],"DF":[84],"terms,":[86],"thus":[87],"improves":[88],"final":[90],"performance":[91],"categorization":[95],"system.":[96],"We":[97],"perform":[98],"experiments":[100],"standard":[103],"Fudan":[104],"Chinese":[105,130],"corpus":[106,110,115],"and":[107,113],"selected":[108],"Sogou":[109],"as":[111],"balanced":[112],"unbalanced":[114],"respectively.":[116],"experiment":[118],"results":[119],"demonstrate":[120],"effectiveness":[122],"categorization.":[132]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
