{"id":"https://openalex.org/W3007840160","doi":"https://doi.org/10.1109/bigdata47090.2019.9006522","title":"Extracting Feature Engineering Knowledge from Data Science Notebooks","display_name":"Extracting Feature Engineering Knowledge from Data Science Notebooks","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3007840160","doi":"https://doi.org/10.1109/bigdata47090.2019.9006522","mag":"3007840160"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9006522","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006522","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044722101","display_name":"Masafumi Oyamada","orcid":"https://orcid.org/0000-0002-4045-7350"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Masafumi Oyamada","raw_affiliation_strings":["NEC Corporation, Kawasaki, Japan"],"affiliations":[{"raw_affiliation_string":"NEC Corporation, Kawasaki, Japan","institution_ids":["https://openalex.org/I118347220"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5044722101"],"corresponding_institution_ids":["https://openalex.org/I118347220"],"apc_list":null,"apc_paid":null,"fwci":2.0349,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90465997,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"6172","last_page":"6173"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.751025915145874},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.7220008969306946},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6480679512023926},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5733397006988525},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5376485586166382},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.5032808184623718},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.48933523893356323},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4776417315006256},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.47354286909103394},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.44635826349258423},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.41153624653816223},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39876651763916016},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.16928315162658691},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16694751381874084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.751025915145874},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.7220008969306946},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6480679512023926},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5733397006988525},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5376485586166382},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.5032808184623718},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.48933523893356323},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4776417315006256},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.47354286909103394},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.44635826349258423},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41153624653816223},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39876651763916016},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.16928315162658691},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16694751381874084},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9006522","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006522","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2109943392","https://openalex.org/W2110893883","https://openalex.org/W2136593687","https://openalex.org/W2146606092","https://openalex.org/W2286236884","https://openalex.org/W2784241156","https://openalex.org/W2904530076","https://openalex.org/W2941332886","https://openalex.org/W4301168982","https://openalex.org/W6761479471"],"related_works":["https://openalex.org/W4387478977","https://openalex.org/W2380784125","https://openalex.org/W3004057759","https://openalex.org/W2889085215","https://openalex.org/W2914806737","https://openalex.org/W4287991004","https://openalex.org/W2998216295","https://openalex.org/W33196642","https://openalex.org/W2611743072","https://openalex.org/W4360615883"],"abstract_inverted_index":{"Designing":[0],"good":[1,39,82],"features":[2,40,83,105],"for":[3,126,163],"machine":[4],"learning":[5],"models,":[6],"which":[7,23],"is":[8,11,58,72,94],"called":[9],"feature-engineering,":[10],"one":[12,61],"of":[13,27,35,49,62,114],"the":[14,25,30,36,43,50,53,63,76,104,164],"most":[15,64],"important":[16,178],"tasks":[17],"in":[18,67,97,111],"data":[19,51,68,77,109,133],"analysis.":[20,69],"Well-designed":[21],"features,":[22],"capture":[24],"characteristics":[26],"data,":[28],"improve":[29],"predictive":[31],"performance":[32],"and":[33,52,90,176],"explainability":[34],"model.":[37],"Since":[38,156],"generally":[41],"reflect":[42],"deep":[44],"knowledge":[45,135],"on":[46],"business":[47],"domains":[48],"analysis":[54],"task,":[55],"feature":[56,128,145,179],"engineering":[57,129,146,180],"considered":[59],"as":[60,88,136,138,182],"difficult":[65],"phases":[66],"Nowadays,":[70],"AutoML":[71,93],"trying":[73],"to":[74],"automate":[75],"science":[78],"process":[79],"by":[80,84,107,153,184],"producing":[81],"autonomous":[85],"algorithms":[86],"such":[87],"feature-synthesis":[89],"feature-selection.":[91],"While":[92],"making":[95],"success":[96],"some":[98],"extent,":[99],"it":[100],"cannot":[101],"reproduce":[102],"all":[103],"crafted":[106],"expert":[108,132],"scientists":[110,134],"reality":[112],"because":[113],"its":[115],"huge":[116],"search":[117],"space.":[118],"In":[119],"this":[120],"paper,":[121],"we":[122],"take":[123],"different":[124],"approach":[125,141,168],"assisting":[127],"process:":[130],"transfer":[131],"much":[137],"possible.":[139],"Proposed":[140],"extracts":[142],"frequently":[143],"used":[144],"operations":[147,181],"from":[148],"source":[149,165,170],"codes":[150,171],"or":[151],"notebooks":[152],"pattern":[154,159],"discovery.":[155],"naive":[157],"textual":[158],"discovery":[160],"performs":[161],"poor":[162],"code,":[166],"our":[167],"converts":[169],"into":[172],"abstract":[173],"syntax":[174],"trees":[175],"discovers":[177],"subgraphs":[183],"performing":[185],"frequent":[186],"subgraph":[187],"mining.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
