{"id":"https://openalex.org/W4240055601","doi":"https://doi.org/10.1080/713827172","title":"Pre-processing of high-dimensional categorical predictors in classification settings","display_name":"Pre-processing of high-dimensional categorical predictors in classification settings","publication_year":2003,"publication_date":"2003-05-01","ids":{"openalex":"https://openalex.org/W4240055601","doi":"https://doi.org/10.1080/713827172"},"language":"en","primary_location":{"id":"doi:10.1080/713827172","is_oa":false,"landing_page_url":"https://doi.org/10.1080/713827172","pdf_url":null,"source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057019721","display_name":"Eugene Tuv","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eugene Tuv","raw_affiliation_strings":["Analysis Control Technology , Intel Corporation , Chandler, AZ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Analysis Control Technology , Intel Corporation , Chandler, AZ, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002544391","display_name":"George C. Runger","orcid":"https://orcid.org/0000-0001-9460-6983"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Runger","raw_affiliation_strings":["Department of Industrial Engineering , Arizona State University , Tempe, AZ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering , Arizona State University , Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2195,"currency":"USD","value_usd":2195},"apc_paid":null,"fwci":0.5391,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71753643,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"17","issue":"5-6","first_page":"419","last_page":"429"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.8877999782562256,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.8877999782562256,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.8751000165939331,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.8223999738693237,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.8998411893844604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8338010311126709},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5862686634063721},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5402323007583618},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5109335780143738},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.4877189099788666},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.48207002878189087},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4765610992908478},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4585876166820526},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4510200023651123},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11903220415115356}],"concepts":[{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.8998411893844604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8338010311126709},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5862686634063721},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5402323007583618},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5109335780143738},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.4877189099788666},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.48207002878189087},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4765610992908478},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4585876166820526},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4510200023651123},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11903220415115356},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/713827172","is_oa":false,"landing_page_url":"https://doi.org/10.1080/713827172","pdf_url":null,"source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4378651134","https://openalex.org/W4386799044","https://openalex.org/W2773208253","https://openalex.org/W4252684102","https://openalex.org/W2560646951","https://openalex.org/W2352307597","https://openalex.org/W1979633005","https://openalex.org/W4297454206","https://openalex.org/W65104662","https://openalex.org/W1871748041"],"abstract_inverted_index":{"Models":[0],"in":[1,25,66,123,129],"industrial":[2],"applications":[3],"can":[4,76],"encounter":[5],"categorical":[6,69],"predictors":[7],"with":[8],"a":[9,31,50,99],"large":[10],"number":[11,52],"of":[12,23,53,56,68,108,127],"categories":[13],"(hundreds":[14],"or":[15],"thousands).":[16],"An":[17],"example":[18],"is":[19,42],"the":[20,116],"lot":[21],"identifier":[22],"product":[24],"semiconductor":[26],"manufacturing.":[27],"Such":[28,71],"variables":[29,60],"represent":[30],"serious":[32],"problem":[33],"for":[34,58],"practically":[35],"all":[36],"modern":[37],"classification":[38],"techniques.":[39],"The":[40,95],"goal":[41],"an":[43,81],"efficient,":[44],"computationally":[45],"fast":[46],"way":[47],"to":[48,83,115,131],"discover":[49],"small":[51],"natural":[54],"partitions":[55,72],"values":[57],"such":[59,87],"that":[61],"have":[62],"similar":[63],"statistical":[64],"properties":[65],"terms":[67],"response.":[70],"(interesting":[73],"by":[74],"itself)":[75],"be":[77],"used":[78,133],"then":[79],"as":[80,88],"input":[82],"standard":[84],"learning":[85],"algorithms,":[86],"decision":[89],"trees,":[90],"support":[91],"vector":[92],"machines,":[93],"etc.":[94],"proposed":[96],"approach":[97],"introduces":[98],"data":[100],"transformation":[101],"on":[102],"derived":[103],"sparse":[104],"frequency":[105],"tables.":[106],"Application":[107],"even":[109],"simplest":[110],"non-hierarchical":[111],"metric":[112],"clustering":[113],"method":[114],"transformed":[117],"coordinates":[118],"shows":[119],"significant":[120],"improvement":[121],"both":[122],"speed":[124],"and":[125],"quality":[126],"partition":[128],"comparison":[130],"currently":[132],"methods.":[134]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
