{"id":"https://openalex.org/W4386204749","doi":"https://doi.org/10.1145/3603719.3603744","title":"Data Driven Dimensionality Reduction to Improve Modeling Performance\u2731","display_name":"Data Driven Dimensionality Reduction to Improve Modeling Performance\u2731","publication_year":2023,"publication_date":"2023-07-10","ids":{"openalex":"https://openalex.org/W4386204749","doi":"https://doi.org/10.1145/3603719.3603744"},"language":"en","primary_location":{"id":"doi:10.1145/3603719.3603744","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3603719.3603744","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3603719.3603744","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"35th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3603719.3603744","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084113618","display_name":"Joshua Chung","orcid":"https://orcid.org/0009-0005-3293-3758"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joshua Chung","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, USA"],"raw_orcid":"https://orcid.org/0009-0005-3293-3758","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011615735","display_name":"Marcos L\u00f3pez de Prado","orcid":"https://orcid.org/0000-0002-4107-3797"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcos Lopez De Prado","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, USA"],"raw_orcid":"https://orcid.org/0000-0002-4107-3797","affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066426676","display_name":"Horst D. Simon","orcid":"https://orcid.org/0000-0003-0832-3720"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Horst Simon","raw_affiliation_strings":["ADIA Lab, United Arab Emirates"],"raw_orcid":"https://orcid.org/0000-0003-0832-3720","affiliations":[{"raw_affiliation_string":"ADIA Lab, United Arab Emirates","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043129695","display_name":"Kesheng Wu","orcid":"https://orcid.org/0000-0002-6907-3393"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kesheng Wu","raw_affiliation_strings":["Berkeley Lab, USA"],"raw_orcid":"https://orcid.org/0000-0002-6907-3393","affiliations":[{"raw_affiliation_string":"Berkeley Lab, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4856,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.70839987,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8218532800674438},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6662845611572266},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.6110044121742249},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.6033020615577698},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.565324068069458},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5536881685256958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5250577926635742},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5129361152648926},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.48533907532691956},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.45225784182548523}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8218532800674438},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6662845611572266},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.6110044121742249},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.6033020615577698},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.565324068069458},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5536881685256958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5250577926635742},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5129361152648926},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.48533907532691956},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.45225784182548523}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3603719.3603744","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3603719.3603744","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3603719.3603744","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"35th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt0555v6rb","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/0555v6rb","pdf_url":"https://escholarship.org/content/qt0555v6rb/qt0555v6rb.pdf","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3603719.3603744","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3603719.3603744","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3603719.3603744","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"35th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8100000023841858}],"awards":[{"id":"https://openalex.org/G1286236842","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1677143136","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G1799333409","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320337506","funder_display_name":"Advanced Scientific Computing Research"},{"id":"https://openalex.org/G2503023272","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3083819904","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G4501827968","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4565140552","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5076365615","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G5296923526","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G5375843722","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320337506","funder_display_name":"Advanced Scientific Computing Research"},{"id":"https://openalex.org/G542631466","display_name":null,"funder_award_id":"No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G5614806141","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G6348972864","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7672463193","display_name":null,"funder_award_id":"No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G805243471","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320317220","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386204749.pdf","grobid_xml":"https://content.openalex.org/works/W4386204749.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1863336885","https://openalex.org/W2001141328","https://openalex.org/W2027461913","https://openalex.org/W2031874479","https://openalex.org/W2053186076","https://openalex.org/W2100218206","https://openalex.org/W2121224351","https://openalex.org/W2137570937","https://openalex.org/W2161736907","https://openalex.org/W2346467685","https://openalex.org/W2586047144","https://openalex.org/W2602458379","https://openalex.org/W2613597870","https://openalex.org/W2618851150","https://openalex.org/W2754029504","https://openalex.org/W2903777941","https://openalex.org/W2910027896","https://openalex.org/W2910971961","https://openalex.org/W2949676527","https://openalex.org/W2950336271","https://openalex.org/W2962862931","https://openalex.org/W2995843219","https://openalex.org/W2999546058","https://openalex.org/W3006612813","https://openalex.org/W3011249019","https://openalex.org/W3024333932","https://openalex.org/W3045004532","https://openalex.org/W3120671187","https://openalex.org/W3121675868","https://openalex.org/W3125595766","https://openalex.org/W3131860561","https://openalex.org/W3179950556","https://openalex.org/W4206095984","https://openalex.org/W4223998738","https://openalex.org/W4233454717","https://openalex.org/W4247541400","https://openalex.org/W6922407243"],"related_works":["https://openalex.org/W1995622179","https://openalex.org/W1484111231","https://openalex.org/W4391160746","https://openalex.org/W1552543208","https://openalex.org/W2074396517","https://openalex.org/W2166963679","https://openalex.org/W2187269125","https://openalex.org/W1641615907","https://openalex.org/W3089231081","https://openalex.org/W4411583871"],"abstract_inverted_index":{"In":[0,13,214],"a":[1,43,52,65,104,113,127,167,219],"number":[2,129],"of":[3,55,78,86,115,130,169,193,216],"applications,":[4],"data":[5,172],"may":[6],"be":[7],"anonymized,":[8],"obfuscated,":[9],"or":[10,23],"highly":[11],"noisy.":[12],"such":[14,32],"cases,":[15],"it":[16],"is":[17,157,208],"difficult":[18],"to":[19,26,80,125,152,159,210],"use":[20],"domain":[21,181],"knowledge":[22],"low-dimensional":[24,49],"visualizations":[25],"engineer":[27],"the":[28,83,94,121,174,183,189,204,230],"features":[29,87,176],"for":[30,46,150],"tasks":[31],"as":[33,42],"machine":[34],"learning,":[35],"instead,":[36],"we":[37,63,102],"explore":[38],"dimensionality":[39],"reduction":[40],"(DR)":[41],"data-driven":[44],"approach":[45],"engineering":[47],"these":[48,100],"representations.":[50],"Through":[51],"careful":[53],"examination":[54],"available":[56],"feature":[57,60,69,222],"selection":[58],"and":[59,88,133,137,156,199,234],"extraction":[61],"techniques,":[62],"propose":[64],"new":[66,72],"class":[67],"named":[68],"clustering.":[70],"These":[71],"methods":[73],"could":[74],"utilize":[75],"different":[76,92,238],"forms":[77],"clustering":[79,223],"help":[81],"evaluate":[82,99],"relative":[84],"importance":[85],"take":[89],"on":[90,112,142,180,236],"properties":[91],"from":[93],"well-known":[95,231],"DR":[96,185,227],"algorithms.":[97],"To":[98],"algorithms,":[101],"develop":[103],"parallel":[105,122],"computing":[106,123],"framework":[107,119,147],"that":[108],"optimizes":[109],"their":[110],"hyperparameters":[111],"sample":[114],"application":[116],"datasets.":[117],"This":[118,145,202],"harnesses":[120],"power":[124],"examine":[126,160],"large":[128],"parameter":[131,162],"combinations":[132],"enables":[134],"hyperparameter":[135],"tuning":[136,139],"model":[138],"purely":[140],"based":[141,179],"observed":[143],"performance.":[144],"optimization":[146,206],"provides":[148],"mechanism":[149],"users":[151],"control":[153],"computational":[154],"cost":[155],"able":[158,209],"many":[161],"choices":[163],"in":[164],"seconds.":[165],"On":[166],"set":[168],"building":[170,194],"energy":[171],"where":[173],"key":[175],"are":[177],"known":[178,212],"knowledge,":[182],"optimized":[184],"algorithms":[186,228],"indeed":[187],"identify":[188],"expected":[190],"main":[191],"drivers":[192],"electricity":[195],"usage:":[196],"outdoor":[197],"temperature":[198],"solar":[200],"radiance.":[201],"shows":[203],"automated":[205],"procedure":[207],"find":[211],"features.":[213],"terms":[215],"modeling":[217],"accuracy,":[218],"distance":[220],"correlation-based":[221],"method":[224],"outperforms":[225],"other":[226],"including":[229],"KPCA,":[232],"LLE,":[233],"UMAP":[235],"two":[237],"tests.":[239]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
