{"id":"https://openalex.org/W4414425407","doi":"https://doi.org/10.1145/3769077","title":"Stable Subsampling under Model Misspecification and Covariate Shift","display_name":"Stable Subsampling under Model Misspecification and Covariate Shift","publication_year":2025,"publication_date":"2025-09-23","ids":{"openalex":"https://openalex.org/W4414425407","doi":"https://doi.org/10.1145/3769077"},"language":"en","primary_location":{"id":"doi:10.1145/3769077","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769077","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102341160","display_name":"Jinjing Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinjing Yang","raw_affiliation_strings":["NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","NITFID, School of Statistics and Data Science, Nankai University, China"],"raw_orcid":"https://orcid.org/0009-0003-0627-685X","affiliations":[{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]},{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100965566","display_name":"Shaohua Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaohua Xu","raw_affiliation_strings":["NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","NITFID, School of Statistics and Data Science, Nankai University, China"],"raw_orcid":"https://orcid.org/0009-0008-0793-9391","affiliations":[{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]},{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065081397","display_name":"Zebin Yang","orcid":"https://orcid.org/0000-0001-5683-7502"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zebin Yang","raw_affiliation_strings":["Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong, Hong Kong","Department of Statistics and Actuarial Science, The University of Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0001-5683-7502","affiliations":[{"raw_affiliation_string":"Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Department of Statistics and Actuarial Science, The University of Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101664060","display_name":"Aijun Zhang","orcid":"https://orcid.org/0000-0001-9729-9018"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Aijun Zhang","raw_affiliation_strings":["Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong, Hong Kong","Department of Statistics and Actuarial Science, The University of Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0001-9729-9018","affiliations":[{"raw_affiliation_string":"Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]},{"raw_affiliation_string":"Department of Statistics and Actuarial Science, The University of Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101703906","display_name":"Yongdao Zhou","orcid":"https://orcid.org/0000-0003-3805-7021"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdao Zhou","raw_affiliation_strings":["NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","NITFID, School of Statistics and Data Science, Nankai University, China"],"raw_orcid":"https://orcid.org/0000-0003-3805-7021","affiliations":[{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]},{"raw_affiliation_string":"NITFID, School of Statistics and Data Science, Nankai University, China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2174315,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":"9","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/covariate","display_name":"Covariate","score":0.8906000256538391},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6751000285148621},{"id":"https://openalex.org/keywords/independence","display_name":"Independence (probability theory)","score":0.6366000175476074},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5202999711036682},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.46239998936653137},{"id":"https://openalex.org/keywords/conditional-independence","display_name":"Conditional independence","score":0.4047999978065491},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.4018999934196472},{"id":"https://openalex.org/keywords/generalized-linear-model","display_name":"Generalized linear model","score":0.37630000710487366},{"id":"https://openalex.org/keywords/linear-regression","display_name":"Linear regression","score":0.3668999969959259}],"concepts":[{"id":"https://openalex.org/C119043178","wikidata":"https://www.wikidata.org/wiki/Q320723","display_name":"Covariate","level":2,"score":0.8906000256538391},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6751000285148621},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.6366000175476074},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.541700005531311},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5202999711036682},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.46239998936653137},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.45660001039505005},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41850000619888306},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C41587187","wikidata":"https://www.wikidata.org/wiki/Q1501882","display_name":"Generalized linear model","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35109999775886536},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.34790000319480896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3441999852657318},{"id":"https://openalex.org/C167085575","wikidata":"https://www.wikidata.org/wiki/Q6803654","display_name":"Mean squared prediction error","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C17137986","wikidata":"https://www.wikidata.org/wiki/Q215067","display_name":"Orthogonality","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C2988709989","wikidata":"https://www.wikidata.org/wiki/Q85784623","display_name":"Mean square","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C77350462","wikidata":"https://www.wikidata.org/wiki/Q1125472","display_name":"Confounding","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.28459998965263367},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C163175372","wikidata":"https://www.wikidata.org/wiki/Q3339222","display_name":"Linear model","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C19619285","wikidata":"https://www.wikidata.org/wiki/Q196372","display_name":"Observational error","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769077","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769077","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6152136666","display_name":null,"funder_award_id":"12131001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1593532658","https://openalex.org/W1920962657","https://openalex.org/W1999542471","https://openalex.org/W2040406273","https://openalex.org/W2079315534","https://openalex.org/W2102201073","https://openalex.org/W2120817734","https://openalex.org/W2126034719","https://openalex.org/W2132324013","https://openalex.org/W2134740045","https://openalex.org/W2413343461","https://openalex.org/W2498260651","https://openalex.org/W2552949058","https://openalex.org/W2596535828","https://openalex.org/W2766451779","https://openalex.org/W2915480215","https://openalex.org/W2963062793","https://openalex.org/W3007501395","https://openalex.org/W3016824580","https://openalex.org/W3033161486","https://openalex.org/W3097162844","https://openalex.org/W3099924168","https://openalex.org/W3118133300","https://openalex.org/W3158480457","https://openalex.org/W4206189040","https://openalex.org/W4244779605","https://openalex.org/W4287024952","https://openalex.org/W4387743165"],"related_works":[],"abstract_inverted_index":{"The":[0],"presence":[1],"of":[2,67,73,134],"covariate":[3,142],"shift":[4],"between":[5],"training":[6,25],"and":[7,52,102,141],"test":[8],"datasets,":[9,123,130],"coupled":[10],"with":[11,28,70],"model":[12,139],"misspecification,":[13],"can":[14],"lead":[15],"to":[16,60],"instability":[17],"in":[18,112],"regression":[19],"predictions":[20],"across":[21],"diverse":[22],"datasets.":[23],"Meanwhile,":[24],"complex":[26],"models":[27],"massive":[29],"data":[30,81],"imposes":[31],"significant":[32],"computational":[33],"burden.":[34],"In":[35],"this":[36],"article,":[37],"we":[38],"present":[39],"a":[40],"novel":[41],"model-free":[42],"subsampling":[43,57,65],"algorithm":[44,58],"for":[45],"stable":[46,85],"prediction,":[47],"which":[48],"employs":[49],"uniform":[50,68,92],"design":[51,69],"confounder":[53],"balancing":[54],"methods.":[55],"Our":[56],"aims":[59],"find":[61],"the":[62,71,80,91,95,103,108,132],"nearest":[63],"neighbor":[64],"points":[66],"goal":[72],"minimizing":[74],"global":[75,104],"stability":[76,105],"loss,":[77],"thereby":[78],"reducing":[79],"volume":[82],"while":[83],"achieving":[84],"predictions.":[86],"Theoretic":[87],"analyses":[88],"show":[89],"that":[90],"measure":[93],"minimizes":[94],"maximum":[96],"integrated":[97],"mean":[98],"square":[99],"error":[100],"(MIMSE)":[101],"loss":[106],"evaluates":[107],"independence":[109],"among":[110],"variables":[111],"each":[113],"candidate":[114],"MIMSE-optimal":[115],"subsampled":[116],"sets.":[117],"Simulation":[118],"studies":[119],"conducted":[120],"on":[121,128],"synthetic":[122],"as":[124,126],"well":[125],"applications":[127],"real":[129],"demonstrate":[131],"superiority":[133],"our":[135],"proposed":[136],"method":[137],"under":[138],"misspecification":[140],"shift.":[143]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
