{"id":"https://openalex.org/W2980281243","doi":"https://doi.org/10.1109/jsait.2020.2981538","title":"An Information-Theoretic Approach to Unsupervised Feature Selection for High-Dimensional Data","display_name":"An Information-Theoretic Approach to Unsupervised Feature Selection for High-Dimensional Data","publication_year":2020,"publication_date":"2020-03-17","ids":{"openalex":"https://openalex.org/W2980281243","doi":"https://doi.org/10.1109/jsait.2020.2981538","mag":"2980281243"},"language":"en","primary_location":{"id":"doi:10.1109/jsait.2020.2981538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2020.2981538","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1910.03196","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shao-Lun Huang","orcid":"https://orcid.org/0000-0003-2827-4022"},"institutions":[{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shao-Lun Huang","raw_affiliation_strings":["Data Science and Information Technology Research Center, Tsinghua\u2013Berkeley Shenzhen Institute, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Data Science and Information Technology Research Center, Tsinghua\u2013Berkeley Shenzhen Institute, Shenzhen, China","institution_ids":["https://openalex.org/I4210114105"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiangxiang Xu","orcid":"https://orcid.org/0000-0002-4178-0934"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangxiang Xu","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":null,"display_name":"Lizhong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lizhong Zheng","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, Massachusetts Institute of Technology, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, Massachusetts Institute of Technology, Cambridge, USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210114105"],"apc_list":null,"apc_paid":null,"fwci":0.4907,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.64417402,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"1","issue":"1","first_page":"157","last_page":"166"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.21850000321865082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.21850000321865082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11716","display_name":"Random Matrices and Applications","score":0.08009999990463257,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.065700002014637,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7027000188827515},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.4993000030517578},{"id":"https://openalex.org/keywords/functional-principal-component-analysis","display_name":"Functional principal component analysis","score":0.4830000102519989},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.47760000824928284},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46380001306533813},{"id":"https://openalex.org/keywords/random-variable","display_name":"Random variable","score":0.4560000002384186},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4512999951839447},{"id":"https://openalex.org/keywords/joint-probability-distribution","display_name":"Joint probability distribution","score":0.4359000027179718},{"id":"https://openalex.org/keywords/conditional-probability-distribution","display_name":"Conditional probability distribution","score":0.40950000286102295},{"id":"https://openalex.org/keywords/information-theory","display_name":"Information theory","score":0.39989998936653137}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7027000188827515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5967000126838684},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.4993000030517578},{"id":"https://openalex.org/C71176878","wikidata":"https://www.wikidata.org/wiki/Q17014987","display_name":"Functional principal component analysis","level":3,"score":0.4830000102519989},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.47760000824928284},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.46380001306533813},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46380001306533813},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4634999930858612},{"id":"https://openalex.org/C122123141","wikidata":"https://www.wikidata.org/wiki/Q176623","display_name":"Random variable","level":2,"score":0.4560000002384186},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C43555835","wikidata":"https://www.wikidata.org/wiki/Q2300258","display_name":"Conditional probability distribution","level":2,"score":0.40950000286102295},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.39989998936653137},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.3808000087738037},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C55974624","wikidata":"https://www.wikidata.org/wiki/Q1188504","display_name":"Exponential family","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3528999984264374},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31859999895095825},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C165216359","wikidata":"https://www.wikidata.org/wiki/Q670653","display_name":"Marginal distribution","level":3,"score":0.31700000166893005},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C124805900","wikidata":"https://www.wikidata.org/wiki/Q5159269","display_name":"Conditional mutual information","level":3,"score":0.3118000030517578},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3070000112056732},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C38764148","wikidata":"https://www.wikidata.org/wiki/Q17098245","display_name":"Interaction information","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C138405894","wikidata":"https://www.wikidata.org/wiki/Q3179949","display_name":"Multivariate random variable","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C44492722","wikidata":"https://www.wikidata.org/wiki/Q327069","display_name":"Conditional probability","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2732999920845032},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C56672385","wikidata":"https://www.wikidata.org/wiki/Q17157111","display_name":"Mixture distribution","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C141547133","wikidata":"https://www.wikidata.org/wiki/Q7291996","display_name":"Random variate","level":3,"score":0.25600001215934753}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/jsait.2020.2981538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2020.2981538","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1910.03196","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.03196","pdf_url":"https://arxiv.org/pdf/1910.03196","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1910.03196","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.03196","pdf_url":"https://arxiv.org/pdf/1910.03196","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6483256435","display_name":null,"funder_award_id":"61807021","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2004026774","https://openalex.org/W2018582985","https://openalex.org/W2039103602","https://openalex.org/W2044475502","https://openalex.org/W2095439994","https://openalex.org/W2112796928","https://openalex.org/W2118557299","https://openalex.org/W2144012927","https://openalex.org/W2144938562","https://openalex.org/W2235688562","https://openalex.org/W2342676922","https://openalex.org/W2556383977","https://openalex.org/W2583932153","https://openalex.org/W2963767133","https://openalex.org/W2976504920","https://openalex.org/W4229706427","https://openalex.org/W4239510810","https://openalex.org/W4248624814","https://openalex.org/W4312258136","https://openalex.org/W6770506162"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,88,119,151],"propose":[4],"an":[5,74],"information-theoretic":[6],"approach":[7,144,155],"to":[8,13,30,113,129,159],"design":[9,120],"the":[10,15,32,36,46,55,60,79,90,98,103,115,122,131,188],"functional":[11,100,133,175],"representations":[12,101,110,134],"extract":[14],"hidden":[16,47],"common":[17,33,56,116],"structure":[18],"shared":[19],"by":[20,39,73,78,195],"a":[21,139],"set":[22],"of":[23,49,81,102,190],"random":[24,37,51,104],"variables.":[25],"The":[26],"main":[27],"idea":[28],"is":[29,58],"measure":[31],"information":[34,57,182],"between":[35,181],"variables":[38,52],"Watanabe's":[40],"total":[41],"correlation,":[42,167],"and":[43,106,138,173,184],"then":[44],"find":[45],"attributes":[48,69,96],"these":[50,63,68,95],"such":[53,109,162],"that":[54,67,108,153],"reduced":[59],"most":[61],"given":[62],"attributes.":[64],"We":[65],"show":[66,107,152],"can":[70],"be":[71],"characterized":[72],"exponential":[75],"family":[76],"specified":[77],"eigen-decomposition":[80],"some":[82],"pairwise":[83],"joint":[84],"distribution":[85],"matrix.":[86],"Then,":[87],"adopt":[89],"log-likelihood":[91],"functions":[92],"for":[93,135,145],"estimating":[94],"as":[97,163],"desired":[99],"variables,":[105],"are":[111,193],"informative":[112],"describe":[114],"structure.":[117],"Moreover,":[118],"both":[121],"multivariate":[123],"alternating":[124],"conditional":[125],"expectation":[126],"(MACE)":[127],"algorithm":[128],"compute":[130],"proposed":[132],"discrete":[136],"data,":[137],"novel":[140],"neural":[141],"network":[142],"training":[143],"continuous":[146],"or":[147],"high-dimensional":[148],"data.":[149],"Furthermore,":[150],"our":[154,191],"has":[156],"deep":[157],"connections":[158,180],"existing":[160],"techniques,":[161],"Hirschfeld-Gebelein-R\u00e9nyi":[164],"(HGR)":[165],"maximal":[166],"linear":[168],"principal":[169],"component":[170],"analysis":[171],"(PCA),":[172],"consistent":[174],"map,":[176],"which":[177],"establishes":[178],"insightful":[179],"theory":[183],"machine":[185],"learning.":[186],"Finally,":[187],"performances":[189],"algorithms":[192],"validated":[194],"numerical":[196],"simulations.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2019-10-18T00:00:00"}
