{"id":"https://openalex.org/W2423993090","doi":"https://doi.org/10.1109/icde.2016.7498272","title":"Input selection for fast feature engineering","display_name":"Input selection for fast feature engineering","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2423993090","doi":"https://doi.org/10.1109/icde.2016.7498272","mag":"2423993090"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2016.7498272","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498272","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101897635","display_name":"Michael R. Anderson","orcid":"https://orcid.org/0000-0003-1791-2770"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael R. Anderson","raw_affiliation_strings":["University of Michigan, Ann Arbor, 48109, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, 48109, United States of America","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039133265","display_name":"Michael Cafarella","orcid":"https://orcid.org/0000-0001-6122-0590"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Cafarella","raw_affiliation_strings":["University of Michigan, Ann Arbor, 48109, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, 48109, United States of America","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101897635"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":10.6647,"has_fulltext":false,"cited_by_count":51,"citation_normalized_percentile":{"value":0.98231372,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"577","last_page":"588"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.702141284942627},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6030144095420837},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5337371230125427},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48254889249801636},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42969179153442383},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3359088599681854}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.702141284942627},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6030144095420837},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5337371230125427},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48254889249801636},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42969179153442383},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3359088599681854},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icde.2016.7498272","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498272","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1435924991","https://openalex.org/W1507957806","https://openalex.org/W1513874326","https://openalex.org/W1522930108","https://openalex.org/W1570448133","https://openalex.org/W1586825695","https://openalex.org/W1986790453","https://openalex.org/W2027720485","https://openalex.org/W2037701287","https://openalex.org/W2039643849","https://openalex.org/W2049934117","https://openalex.org/W2060791232","https://openalex.org/W2065383075","https://openalex.org/W2071989194","https://openalex.org/W2074935284","https://openalex.org/W2096765155","https://openalex.org/W2099102906","https://openalex.org/W2101234009","https://openalex.org/W2103224511","https://openalex.org/W2103753221","https://openalex.org/W2107598941","https://openalex.org/W2112841646","https://openalex.org/W2118858186","https://openalex.org/W2118978333","https://openalex.org/W2120480077","https://openalex.org/W2132176208","https://openalex.org/W2133990480","https://openalex.org/W2148317584","https://openalex.org/W2155070484","https://openalex.org/W2155398148","https://openalex.org/W2157665255","https://openalex.org/W2161336914","https://openalex.org/W2165742564","https://openalex.org/W2168020168","https://openalex.org/W2168405694","https://openalex.org/W2170584976","https://openalex.org/W2170616854","https://openalex.org/W2173213060","https://openalex.org/W2184623761","https://openalex.org/W2189465200","https://openalex.org/W2253807446","https://openalex.org/W2296677182","https://openalex.org/W2394554088","https://openalex.org/W2406996511","https://openalex.org/W2903158431","https://openalex.org/W2950929549","https://openalex.org/W2964244261","https://openalex.org/W3007818867","https://openalex.org/W4230183041","https://openalex.org/W4251617391","https://openalex.org/W6631307971","https://openalex.org/W6635099281","https://openalex.org/W6675354045","https://openalex.org/W6677919164","https://openalex.org/W6678242812","https://openalex.org/W6682590970","https://openalex.org/W6683235873","https://openalex.org/W6686239164","https://openalex.org/W6687322159","https://openalex.org/W6713468891","https://openalex.org/W6756615331"],"related_works":["https://openalex.org/W4205762803","https://openalex.org/W2535856026","https://openalex.org/W2265065644","https://openalex.org/W2134699697","https://openalex.org/W2322875716","https://openalex.org/W2977677679","https://openalex.org/W2033914206","https://openalex.org/W2042327336","https://openalex.org/W4386564352","https://openalex.org/W2952668426"],"abstract_inverted_index":{"The":[0],"application":[1],"of":[2,13,50,62,69,156,167],"machine":[3,86],"learning":[4,30,87],"to":[5,169,181],"large":[6],"datasets":[7],"has":[8],"become":[9],"a":[10,47,85,89,104,124,133,141],"vital":[11],"component":[12],"many":[14,108],"important":[15],"and":[16,79,174],"sophisticated":[17],"software":[18],"systems":[19,24],"built":[20],"today.":[21],"Such":[22],"trained":[23,54],"are":[25,97,116],"often":[26,103],"based":[27],"on":[28,59],"supervised":[29],"tasks":[31],"that":[32,40,72,144],"require":[33],"features,":[34],"signals":[35],"extracted":[36],"from":[37,179],"the":[38,60,114,153,157],"data":[39,44,75,126],"distill":[41],"complicated":[42],"raw":[43,74],"objects":[45,76],"into":[46],"small":[48],"number":[49],"salient":[51],"values.":[52],"A":[53],"system's":[55],"success":[56],"depends":[57],"substantially":[58],"quality":[61],"its":[63],"features.":[64],"Unfortunately,":[65],"feature":[66,81,100,146,158,164],"engineering\u2014the":[67],"process":[68,106],"writing":[70],"code":[71,111,120],"takes":[73],"as":[77],"input":[78,150],"outputs":[80],"vectors":[82],"suitable":[83],"for":[84,136],"algorithm\u2014is":[88],"tedious,":[90],"time-consuming":[91,125],"experience.":[92],"Because":[93,113],"\u201cbig":[94],"data\u201d":[95],"inputs":[96,115],"so":[98,117],"diverse,":[99],"engineering":[101,147,159],"is":[102],"trial-and-error":[105],"requiring":[107],"small,":[109],"iterative":[110],"changes.":[112],"large,":[118],"each":[119,130],"change":[121],"can":[122],"involve":[123],"processing":[127],"task":[128],"(over":[129],"page":[131],"in":[132,171,184],"Web":[134],"crawl,":[135],"example).":[137],"We":[138],"introduce":[139],"Zombie,":[140],"data-centric":[142],"system":[143,162],"accelerates":[145],"through":[148],"intelligent":[149],"selection,":[151],"optimizing":[152],"\u201cinner":[154],"loop\u201d":[155],"process.":[160],"Our":[161],"yields":[163],"evaluation":[165],"speedups":[166],"up":[168],"8\u00d7":[170],"some":[172],"cases":[173],"reduces":[175],"engineer":[176],"wait":[177],"times":[178],"8":[180],"5":[182],"hours":[183],"others.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
