{"id":"https://openalex.org/W4408520167","doi":"https://doi.org/10.1109/tbdata.2025.3552343","title":"Optimal Subdata Selection for Prediction Based on the Distribution of the Covariates","display_name":"Optimal Subdata Selection for Prediction Based on the Distribution of the Covariates","publication_year":2025,"publication_date":"2025-03-17","ids":{"openalex":"https://openalex.org/W4408520167","doi":"https://doi.org/10.1109/tbdata.2025.3552343"},"language":"en","primary_location":{"id":"doi:10.1109/tbdata.2025.3552343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tbdata.2025.3552343","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/tbdata.2025.3552343","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116657851","display_name":"Alvaro Cia-Mina","orcid":"https://orcid.org/0000-0003-0081-4074"},"institutions":[{"id":"https://openalex.org/I88155538","display_name":"Universidad de Navarra","ror":"https://ror.org/02rxc7m23","country_code":"ES","type":"education","lineage":["https://openalex.org/I88155538"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Alvaro Cia-Mina","raw_affiliation_strings":["Institute of Data Science and Artificial Intelligence (DATAI), Universidad de Navarra, Pamplona, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Data Science and Artificial Intelligence (DATAI), Universidad de Navarra, Pamplona, Spain","institution_ids":["https://openalex.org/I88155538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003089044","display_name":"Jes\u00fas L\u00f3pez\u2013Fidalgo","orcid":"https://orcid.org/0000-0001-7502-8188"},"institutions":[{"id":"https://openalex.org/I88155538","display_name":"Universidad de Navarra","ror":"https://ror.org/02rxc7m23","country_code":"ES","type":"education","lineage":["https://openalex.org/I88155538"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jesus Lopez-Fidalgo","raw_affiliation_strings":["Institute of Data Science and Artificial Intelligence (DATAI), Universidad de Navarra, Pamplona, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Data Science and Artificial Intelligence (DATAI), Universidad de Navarra, Pamplona, Spain","institution_ids":["https://openalex.org/I88155538"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050143476","display_name":"Weng Kee Wong","orcid":"https://orcid.org/0000-0001-5568-3054"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weng Kee Wong","raw_affiliation_strings":["Department of Biostatistics, University of California, Los Angeles, CA, USA","Department of Biostatistics, University of California at Los Angeles, California, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, University of California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"Department of Biostatistics, University of California at Los Angeles, California, USA","institution_ids":["https://openalex.org/I161318765"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5116657851"],"corresponding_institution_ids":["https://openalex.org/I88155538"],"apc_list":null,"apc_paid":null,"fwci":5.4906,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94741122,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"11","issue":"5","first_page":"2601","last_page":"2614"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9214000105857849,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9214000105857849,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7501389980316162},{"id":"https://openalex.org/keywords/covariate","display_name":"Covariate","score":0.7318788766860962},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5995590686798096},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33263909816741943},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32182687520980835},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.27888602018356323}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7501389980316162},{"id":"https://openalex.org/C119043178","wikidata":"https://www.wikidata.org/wiki/Q320723","display_name":"Covariate","level":2,"score":0.7318788766860962},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5995590686798096},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33263909816741943},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32182687520980835},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27888602018356323}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tbdata.2025.3552343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tbdata.2025.3552343","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/tbdata.2025.3552343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tbdata.2025.3552343","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W170943188","https://openalex.org/W2001568861","https://openalex.org/W2017351261","https://openalex.org/W2048144055","https://openalex.org/W2055389381","https://openalex.org/W2076524037","https://openalex.org/W2592576905","https://openalex.org/W2596535828","https://openalex.org/W2625919377","https://openalex.org/W2742855836","https://openalex.org/W2766451779","https://openalex.org/W2904739162","https://openalex.org/W2990342459","https://openalex.org/W2999594302","https://openalex.org/W3032084519","https://openalex.org/W3044420422","https://openalex.org/W3047634786","https://openalex.org/W3097162844","https://openalex.org/W3126179019","https://openalex.org/W3133772580","https://openalex.org/W3135219656","https://openalex.org/W4210703010","https://openalex.org/W4214812430","https://openalex.org/W4230733463","https://openalex.org/W4320491312","https://openalex.org/W4360838431","https://openalex.org/W4367186682"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2985746494","https://openalex.org/W4206042385","https://openalex.org/W2511384863","https://openalex.org/W2096089271","https://openalex.org/W2923628599","https://openalex.org/W2014100433","https://openalex.org/W2051519658"],"abstract_inverted_index":{"Huge":[0],"data":[1,20,117],"sets":[2],"are":[3,95],"widely":[4],"available":[5],"now":[6],"and":[7,26,92,107,115,127],"there":[8],"is":[9,66],"growing":[10],"interest":[11],"in":[12],"selecting":[13],"an":[14,79,137],"optimal":[15,42,80,138],"subsample":[16,139],"from":[17,140],"the":[18,47,54,58,71,99,128],"full":[19],"set":[21],"to":[22,75,135],"improve":[23],"inference":[24],"efficiency":[25],"reduce":[27],"labeling":[28],"costs.":[29],"We":[30],"propose":[31],"a":[32,40,84],"new":[33],"criterion":[34,44],"called":[35],"J\u2013optimality,":[36],"that":[37,45,67,76],"builds":[38],"upon":[39],"popular":[41],"selection":[43,73],"minimizes":[46],"Random\u2013X":[48],"prediction":[49],"error":[50],"by":[51],"additionally":[52],"incorporating":[53],"joint":[55],"distribution":[56],"of":[57,63,77],"covariates.":[59],"A":[60],"key":[61],"advantage":[62],"our":[64,120],"approach":[65],"we":[68],"can":[69,131],"relate":[70],"subsampling":[72,101,125],"problem":[74],"finding":[78,91,111],"approximate":[81],"design":[82],"under":[83],"convex":[85],"criterion,":[86],"where":[87],"analytical":[88],"tools":[89],"for":[90,110],"studying":[93],"them":[94],"already":[96],"available.":[97],"Consequently,":[98],"J\u2013optimal":[100],"method":[102],"comes":[103],"with":[104],"theoretical":[105],"results":[106,114],"theory-based":[108],"algorithms":[109,130],"them.":[112],"Simulation":[113],"real":[116],"analysis":[118],"show":[119],"proposed":[121,129],"methods":[122,126],"outperform":[123],"current":[124],"also":[132],"adapt":[133],"efficiently":[134],"select":[136],"streaming":[141],"data.":[142]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
