{"id":"https://openalex.org/W2913491223","doi":"https://doi.org/10.1109/bigdata.2018.8622049","title":"A Hybrid Approach to Identifying Key Factors in Environmental Health Studies","display_name":"A Hybrid Approach to Identifying Key Factors in Environmental Health Studies","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2913491223","doi":"https://doi.org/10.1109/bigdata.2018.8622049","mag":"2913491223"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622049","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622049","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101947049","display_name":"Shi Dong","orcid":"https://orcid.org/0000-0001-7144-7190"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shi Dong","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091839586","display_name":"Zlatan Feric","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zlatan Feric","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460323","display_name":"Xiangy\u00fc Li","orcid":"https://orcid.org/0000-0002-8301-9959"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiangyu Li","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000744415","display_name":"Sheikh Mokhlesur Rahman","orcid":"https://orcid.org/0000-0002-3174-856X"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheikh Mokhlesur Rahman","raw_affiliation_strings":["Department of Civil and Environmental Engineering, Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Civil and Environmental Engineering, Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412632","display_name":"Guangyu Li","orcid":"https://orcid.org/0000-0002-6338-2507"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangyu Li","raw_affiliation_strings":["Dept. of Civil and Enviromental Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Civil and Enviromental Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107654923","display_name":"Chieh Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chieh Wu","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070498910","display_name":"April Z. Gu","orcid":"https://orcid.org/0000-0002-5099-5531"},"institutions":[{"id":"https://openalex.org/I183697816","display_name":"Bangladesh University of Engineering and Technology","ror":"https://ror.org/05a1qpv97","country_code":"BD","type":"education","lineage":["https://openalex.org/I183697816"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"April Z. Gu","raw_affiliation_strings":["Department of Civil Engineering, Bangladesh University of Engineering and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Civil Engineering, Bangladesh University of Engineering and Technology","institution_ids":["https://openalex.org/I183697816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042038501","display_name":"Jennifer Dy","orcid":"https://orcid.org/0000-0002-8430-134X"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jennifer Dy","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061128237","display_name":"David Kaeli","orcid":"https://orcid.org/0000-0002-5692-0151"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Kaeli","raw_affiliation_strings":["Dept. of Electrical and Computer Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039853092","display_name":"John D. Meeker","orcid":"https://orcid.org/0000-0001-8357-5085"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Meeker","raw_affiliation_strings":["University of Michigan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048222134","display_name":"Ingrid Padilla","orcid":"https://orcid.org/0000-0001-8460-1679"},"institutions":[{"id":"https://openalex.org/I60388903","display_name":"University of Puerto Rico-Mayaguez","ror":"https://ror.org/00wek6x04","country_code":"PR","type":"education","lineage":["https://openalex.org/I200399037","https://openalex.org/I60388903"]}],"countries":["PR"],"is_corresponding":false,"raw_author_name":"Ingrid Y. Padilla","raw_affiliation_strings":["University of Puerto Rico at Mayaguez"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Puerto Rico at Mayaguez","institution_ids":["https://openalex.org/I60388903"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jose Cordero","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jose Cordero","raw_affiliation_strings":["University of Georgia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Georgia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109505401","display_name":"Carmen V\u00e9lez Vega","orcid":null},"institutions":[{"id":"https://openalex.org/I158818660","display_name":"University of Puerto Rico, Medical Sciences Campus","ror":"https://ror.org/00h25w961","country_code":"PR","type":"education","lineage":["https://openalex.org/I158818660","https://openalex.org/I200399037"]}],"countries":["PR"],"is_corresponding":false,"raw_author_name":"Carmen Velez Vega","raw_affiliation_strings":["Graduate School of Public Health, University of Puerto Rico Medical Campus"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Public Health, University of Puerto Rico Medical Campus","institution_ids":["https://openalex.org/I158818660"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071749455","display_name":"Zaira Rosario","orcid":"https://orcid.org/0000-0002-8667-7324"},"institutions":[{"id":"https://openalex.org/I60388903","display_name":"University of Puerto Rico-Mayaguez","ror":"https://ror.org/00wek6x04","country_code":"PR","type":"education","lineage":["https://openalex.org/I200399037","https://openalex.org/I60388903"]}],"countries":["PR"],"is_corresponding":false,"raw_author_name":"Zaira Rosario","raw_affiliation_strings":["University of Puerto Rico at Mayaguez"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Puerto Rico at Mayaguez","institution_ids":["https://openalex.org/I60388903"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039953924","display_name":"Akram N. Alshawabkeh","orcid":"https://orcid.org/0000-0001-5243-4087"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Akram Alshawabkeh","raw_affiliation_strings":["Dept. of Civil and Enviromental Engineering, Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Civil and Enviromental Engineering, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":15,"corresponding_author_ids":["https://openalex.org/A5101947049"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":1.1394,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.7950528,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2855","last_page":"2862"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9556999802589417,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9556999802589417,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9139000177383423,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7164338827133179},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.69854336977005},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6518698930740356},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.6326987147331238},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5538443922996521},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5062695741653442},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4935809373855591},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4534206688404083},{"id":"https://openalex.org/keywords/outcome","display_name":"Outcome (game theory)","score":0.45091232657432556},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4506143629550934},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.45038723945617676},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.41298577189445496}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7164338827133179},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.69854336977005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6518698930740356},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.6326987147331238},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5538443922996521},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5062695741653442},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4935809373855591},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4534206688404083},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.45091232657432556},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4506143629550934},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.45038723945617676},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.41298577189445496},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2018.8622049","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622049","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Good health and well-being","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/3"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W162856236","https://openalex.org/W1499467920","https://openalex.org/W1526441817","https://openalex.org/W2101234009","https://openalex.org/W2145287260","https://openalex.org/W2308085519","https://openalex.org/W2342299174","https://openalex.org/W2473046424","https://openalex.org/W2899771611","https://openalex.org/W3024863989","https://openalex.org/W4205687621","https://openalex.org/W4234556776","https://openalex.org/W6629874031","https://openalex.org/W6756040250","https://openalex.org/W6776964227"],"related_works":["https://openalex.org/W4380150146","https://openalex.org/W3024870410","https://openalex.org/W2410652950","https://openalex.org/W4283773154","https://openalex.org/W3139174110","https://openalex.org/W4289597203","https://openalex.org/W4386799044","https://openalex.org/W2085630472","https://openalex.org/W1977098485","https://openalex.org/W2519761320"],"abstract_inverted_index":{"In":[0,63,160],"recent":[1,31],"years,":[2],"the":[3,55,90,100,106,127,153,178],"availability":[4],"of":[5,39,57,70,89,109,152,198,222,244,282,285,325],"data-driven":[6],"analytics":[7],"has":[8],"become":[9],"a":[10,23,195,218,240,253,260,275,283],"key":[11,323],"tool":[12],"in":[13,15,33,51,59,112,129,330],"discovery":[14,58],"public":[16],"health":[17,76],"and":[18,47,80,170,269,298,308,333],"environmental":[19],"science":[20],"research.":[21],"As":[22],"result,":[24],"these":[25,248,303,337],"communities":[26],"have":[27,251],"looked":[28],"to":[29,43,75,98,162,177,206,228,236],"leverage":[30],"advances":[32],"machine":[34,72,116,287],"learning":[35,73,117,288],"algorithms.":[36,208,338],"This":[37],"class":[38],"algorithms":[40,74,118],"are":[41,215],"able":[42,235],"find":[44],"hidden":[45],"patterns":[46],"develop":[48],"new":[49],"knowledge":[50],"complex":[52,223],"data,":[53,133],"accelerating":[54],"rate":[56,108],"multiple":[60],"research":[61],"domains.":[62],"this":[64],"paper,":[65],"we":[66,214,250,279,313],"present":[67,314],"our":[68,130,165,207,231,319],"methodology":[69],"applying":[71],"outcomes,":[77],"chemical":[78],"exposures,":[79],"social":[81],"behavior":[82],"data":[83,172,200,224,245,267,272],"from":[84],"expectant":[85],"mothers,":[86],"as":[87,321],"part":[88],"NIEHS-supported":[91],"PROTECT":[92,154],"Center.":[93],"The":[94],"ultimate":[95],"goal":[96],"is":[97,212],"determine":[99],"dominant":[101],"factors/features":[102],"potentially":[103],"responsible":[104],"for":[105,122],"high":[107,326],"premature":[110],"births":[111],"Puerto":[113,331],"Rico.Many":[114],"commonly-used":[115],"can":[119],"be":[120,189,234,237],"used":[121],"feature":[123,306],"selection.":[124],"However,":[125],"given":[126],"imbalance":[128],"birth":[131,328],"outcome":[132,163],"with":[134,217,239],"many":[135,157],"more":[136],"term":[137],"(i.e.,":[138,146],"37":[139,149],"weeks":[140],"or":[141,185],"longer)":[142],"versus":[143],"preterm":[144,327],"pregnancies":[145],"less":[147],"than":[148],"weeks),":[150],"analysis":[151,187],"dataset":[155],"presents":[156],"unique":[158],"challenges.":[159],"addition":[161],"imbalance,":[164],"database":[166],"contains":[167],"both":[168,305],"quantitative":[169],"categorical":[171],"variables,":[173],"adding":[174],"some":[175],"complexity":[176],"analytical":[179,256],"methods":[180],"used.":[181],"Applying":[182],"straightforward":[183],"correlation":[184],"regression":[186],"would":[188],"insufficient.":[190],"Our":[191,263],"datasets":[192],"also":[193],"contain":[194],"significant":[196],"amount":[197],"missing":[199,271],"(incomplete":[201],"records),":[202],"providing":[203],"noisy":[204],"input":[205],"A":[209],"further":[210],"challenge":[211],"that":[213],"working":[216],"relatively":[219,241],"limited":[220],"set":[221],"(only":[225],"2000":[226],"participants":[227],"date),":[229],"so":[230],"models":[232],"must":[233],"built":[238],"small":[242],"number":[243,284],"samples.To":[246],"overcome":[247],"challenges,":[249],"implemented":[252],"cus-tomized":[254],"end-to-end":[255],"toolchain":[257],"which":[258],"forms":[259],"pre-processing":[261],"pipeline.":[262],"framework":[264],"performs":[265],"general":[266],"filtering":[268],"handles":[270],"fields":[273],"using":[274],"similarity-based":[276],"approach.":[277],"Next,":[278],"apply":[280],"one":[281],"different":[286],"algorithms,":[289],"including":[290],"Linear":[291],"Correlation,":[292],"Normalized":[293],"Mutual":[294],"Information,":[295],"Logistic":[296],"Regression,":[297],"Decision":[299],"Trees.":[300],"We":[301],"use":[302],"during":[304],"selection":[307],"model":[309,320],"performance":[310],"evaluation.":[311],"Finally,":[312],"top-ranked":[315],"features":[316],"produced":[317],"by":[318],"potential":[322],"contributors":[324],"rates":[329],"Rico,":[332],"discuss":[334],"results":[335],"across":[336]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2025-10-10T00:00:00"}
