{"id":"https://openalex.org/W2202916482","doi":"https://doi.org/10.1109/bigdata.2015.7363774","title":"A scalable implementation of information theoretic feature selection for high dimensional data","display_name":"A scalable implementation of information theoretic feature selection for high dimensional data","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2202916482","doi":"https://doi.org/10.1109/bigdata.2015.7363774","mag":"2202916482"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7363774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029918720","display_name":"Anthony Kleerekoper","orcid":"https://orcid.org/0000-0002-3621-8568"},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"education","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anthony Kleerekoper","raw_affiliation_strings":["School of Computing, Mathematics and Digital Technologies, Manchester Metropolitan University, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Mathematics and Digital Technologies, Manchester Metropolitan University, UK","institution_ids":["https://openalex.org/I11983389"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113706359","display_name":"Michael Pappas","orcid":null},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Michael Pappas","raw_affiliation_strings":["School of Computer Science, The University of Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008758988","display_name":"Adam Pocock","orcid":"https://orcid.org/0000-0003-2449-0844"},"institutions":[{"id":"https://openalex.org/I1342911587","display_name":"Oracle (United States)","ror":"https://ror.org/006c77m33","country_code":"US","type":"company","lineage":["https://openalex.org/I1342911587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Pocock","raw_affiliation_strings":["Oracle Labs, Burlington, MA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oracle Labs, Burlington, MA","institution_ids":["https://openalex.org/I1342911587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091107483","display_name":"Gavin Brown","orcid":"https://orcid.org/0000-0003-2261-9018"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gavin Brown","raw_affiliation_strings":["School of Computer Science, The University of Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026069638","display_name":"Mikel Luj\u00e1n","orcid":"https://orcid.org/0000-0002-0842-1083"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mikel Lujan","raw_affiliation_strings":["School of Computer Science, The University of Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3355,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.86621124,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"339","last_page":"346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8332574367523193},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7468940615653992},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5630938410758972},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.562641441822052},{"id":"https://openalex.org/keywords/mast","display_name":"Mast (botany)","score":0.5406638383865356},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5397696495056152},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4910874366760254},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.4569258689880371},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.4504852294921875},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44113805890083313},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.43108686804771423},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.41039925813674927},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3268442153930664},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.26941701769828796},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24472647905349731},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1729327142238617},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10591825842857361}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8332574367523193},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7468940615653992},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5630938410758972},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.562641441822052},{"id":"https://openalex.org/C2779655021","wikidata":"https://www.wikidata.org/wiki/Q1907811","display_name":"Mast (botany)","level":3,"score":0.5406638383865356},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5397696495056152},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4910874366760254},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.4569258689880371},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.4504852294921875},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44113805890083313},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.43108686804771423},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.41039925813674927},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3268442153930664},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26941701769828796},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24472647905349731},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1729327142238617},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10591825842857361},{"id":"https://openalex.org/C203014093","wikidata":"https://www.wikidata.org/wiki/Q101929","display_name":"Immunology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2779726688","wikidata":"https://www.wikidata.org/wiki/Q191989","display_name":"Mast cell","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/bigdata.2015.7363774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:e-space.mmu.ac.uk:615528","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/b1bbd1ba-562b-4f5c-830b-ea3236947830","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/b1bbd1ba-562b-4f5c-830b-ea3236947830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kleerekoper, A, Pappas, M, Pocock, A, Brown, G & Lujan Moreno, M L 2015, A Scalable Implementation of Information Theoretic Feature Selection for High Dimensional Data. in IEEE Conference on Big Data. https://doi.org/10.1109/BigData.2015.7363774","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:publications/b1bbd1ba-562b-4f5c-830b-ea3236947830","is_oa":false,"landing_page_url":"https://www.research.manchester.ac.uk/portal/en/publications/a-scalable-implementation-of-information-theoretic-feature-selection-for-high-dimensional-data(b1bbd1ba-562b-4f5c-830b-ea3236947830).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kleerekoper, A, Pappas, M, Pocock, A, Brown, G & Lujan Moreno, M L 2015, A Scalable Implementation of Information Theoretic Feature Selection for High Dimensional Data. in IEEE Conference on Big Data. https://doi.org/10.1109/BigData.2015.7363774","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4917891107","display_name":null,"funder_award_id":"EP/L000725/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6368762450","display_name":null,"funder_award_id":"EP/K008730/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W71506233","https://openalex.org/W410850256","https://openalex.org/W603723648","https://openalex.org/W1508049214","https://openalex.org/W1553244859","https://openalex.org/W1730693163","https://openalex.org/W1999129135","https://openalex.org/W2032726409","https://openalex.org/W2056168656","https://openalex.org/W2114060717","https://openalex.org/W2115247131","https://openalex.org/W2131101925","https://openalex.org/W2133990480","https://openalex.org/W2149454242","https://openalex.org/W2154053567","https://openalex.org/W2156504490","https://openalex.org/W2170833472","https://openalex.org/W2461884057","https://openalex.org/W2797602122","https://openalex.org/W2963288913","https://openalex.org/W2997674406","https://openalex.org/W2998216295","https://openalex.org/W3141603496","https://openalex.org/W4243863038","https://openalex.org/W4285719527","https://openalex.org/W6614148910","https://openalex.org/W6682496738","https://openalex.org/W6682686508","https://openalex.org/W7066667914"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W3092506759","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W120741642","https://openalex.org/W138569904","https://openalex.org/W2390914021","https://openalex.org/W2389417819"],"abstract_inverted_index":{"With":[0],"the":[1,28,45,73,77,102,112,116,143,170,217,224],"growth":[2],"of":[3,13,31,47,89,97,118,129,142,159,172,193,223,226,235],"high":[4],"dimensional":[5],"data,":[6],"feature":[7],"selection":[8,51,65],"is":[9,204],"a":[10,95,157,209,233,247],"vital":[11],"component":[12],"machine":[14],"learning":[15],"as":[16,18],"well":[17],"an":[19,83,122,127,221,255],"important":[20],"stand":[21],"alone":[22],"data":[23,33,62,98,113],"analytics":[24,34],"tool.":[25],"Without":[26],"it,":[27],"computation":[29],"cost":[30],"big":[32],"can":[35,43],"become":[36],"unmanageable":[37],"and":[38,41,54,76,86,136,186,239],"spurious":[39],"correlations":[40],"noise":[42],"reduce":[44,115],"accuracy":[46],"any":[48],"results.":[49],"Feature":[50,64],"removes":[52],"irrelevant":[53],"redundant":[55],"information":[56,69],"leading":[57],"to":[58,114,140,150,166,231],"faster,":[59],"more":[60],"reliable":[61],"analysis.":[63],"techniques":[66,162],"based":[67],"on":[68,175,216,246],"theory":[70],"are":[71],"among":[72],"fastest":[74],"known":[75],"Manchester":[78],"AnalyticS":[79],"Toolkit":[80],"(MAST)":[81],"provides":[82],"efficient,":[84],"parallel":[85,160],"scalable":[87],"implementation":[88],"these":[90],"methods.":[91],"This":[92],"paper":[93],"considers":[94],"number":[96,117,158,171],"structures":[99,145],"for":[100,213],"storing":[101],"frequency":[103],"counters":[104,120],"that":[105,110,146,163,189,202],"underpin":[106],"MAST.":[107],"We":[108,154],"show":[109,188,201],"preprocessing":[111],"zero-valued":[119,152],"in":[121,126,132,242],"array":[123],"structure":[124],"results":[125],"order":[128],"magnitude":[130,194],"reduction":[131],"both":[133],"memory":[134],"usage":[135],"execution":[137],"time":[138],"compared":[139],"state":[141],"art":[144],"use":[147],"explicit":[148],"mappings":[149],"avoid":[151],"counters.":[153],"also":[155],"describe":[156],"processing":[161],"enable":[164],"MAST":[165,178,203],"scale":[167],"linearly":[168],"with":[169],"processors":[173],"even":[174],"NUMA":[176],"architectures.":[177],"targets":[179],"scale-up":[180],"servers":[181],"rather":[182],"than":[183,196,208],"scale-out":[184,210],"clusters":[185],"we":[187,200,228],"it":[190],"performs":[191],"orders":[192],"faster":[195,207],"existing":[197],"tools.":[198],"Moreover,":[199],"3.5":[205],"times":[206],"solution":[211],"built":[212],"Spark":[214],"running":[215],"same":[218],"server.":[219],"As":[220],"example":[222],"performance":[225],"MAST,":[227],"were":[229],"able":[230],"process":[232],"dataset":[234],"100":[236],"million":[237],"examples":[238],"100,000":[240],"features":[241],"under":[243],"10":[244],"minutes":[245],"four":[248],"socket":[249,253],"server":[250],"which":[251],"each":[252],"containing":[254],"8-core":[256],"Intel":[257],"Xeon":[258],"E5-4620":[259],"processor.":[260]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
