{"id":"https://openalex.org/W4307450214","doi":"https://doi.org/10.1007/s11222-022-10138-7","title":"Representative random sampling: an empirical evaluation of a novel bin stratification method for model performance estimation","display_name":"Representative random sampling: an empirical evaluation of a novel bin stratification method for model performance estimation","publication_year":2022,"publication_date":"2022-10-27","ids":{"openalex":"https://openalex.org/W4307450214","doi":"https://doi.org/10.1007/s11222-022-10138-7"},"language":"en","primary_location":{"id":"doi:10.1007/s11222-022-10138-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11222-022-10138-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11222-022-10138-7.pdf","source":{"id":"https://openalex.org/S5437875","display_name":"Statistics and Computing","issn_l":"0960-3174","issn":["0960-3174","1573-1375"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Statistics and Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11222-022-10138-7.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024965387","display_name":"Michael C. Rendleman","orcid":"https://orcid.org/0000-0002-7537-263X"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael C. Rendleman","raw_affiliation_strings":["Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Brian J. Smith","orcid":null},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian J. Smith","raw_affiliation_strings":["Department of Biostatistics, University of Iowa, 145 N. Riverside Drive, 100 CPHB, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, University of Iowa, 145 N. Riverside Drive, 100 CPHB, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064300637","display_name":"Guadalupe Canahuate","orcid":"https://orcid.org/0000-0001-5873-5454"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guadalupe Canahuate","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064945246","display_name":"Terry A. Braun","orcid":"https://orcid.org/0000-0003-0055-337X"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Terry A. Braun","raw_affiliation_strings":["Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","Department of Biomedical Engineering, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]},{"raw_affiliation_string":"Department of Biomedical Engineering, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040980527","display_name":"John M. Buatti","orcid":"https://orcid.org/0000-0001-8499-3721"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John M. Buatti","raw_affiliation_strings":["Department of Radiation Oncology, University of Iowa Carver College of Medicine, 200 Hawkins Drive, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Department of Radiation Oncology, University of Iowa Carver College of Medicine, 200 Hawkins Drive, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011603476","display_name":"Thomas L. Casavant","orcid":"https://orcid.org/0000-0002-8051-4153"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas L. Casavant","raw_affiliation_strings":["Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","Department of Biomedical Engineering, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA"],"affiliations":[{"raw_affiliation_string":"Center for Bioinformatics and Computational Biology, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]},{"raw_affiliation_string":"Department of Biomedical Engineering, University of Iowa, 5017 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Iowa, 4016 Seamans Center, Iowa City, IA, 52242, USA","institution_ids":["https://openalex.org/I126307644"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5024965387"],"corresponding_institution_ids":["https://openalex.org/I126307644"],"apc_list":{"value":2090,"currency":"EUR","value_usd":2690},"apc_paid":{"value":2090,"currency":"EUR","value_usd":2690},"fwci":0.7811,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.69646212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"32","issue":"6","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11287","display_name":"Cancer Genomics and Diagnostics","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11970","display_name":"Molecular Biology Techniques and Applications","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.7562737464904785},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.597189724445343},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5298184156417847},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.49607786536216736},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.48632675409317017},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4851875901222229},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.45470601320266724},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.4213753342628479},{"id":"https://openalex.org/keywords/cross-validation","display_name":"Cross-validation","score":0.4205610156059265},{"id":"https://openalex.org/keywords/stratified-sampling","display_name":"Stratified sampling","score":0.4159452021121979},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3604532480239868},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.35771825909614563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3169318437576294},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26885291934013367}],"concepts":[{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.7562737464904785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.597189724445343},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5298184156417847},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.49607786536216736},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.48632675409317017},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4851875901222229},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.45470601320266724},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.4213753342628479},{"id":"https://openalex.org/C27181475","wikidata":"https://www.wikidata.org/wiki/Q541014","display_name":"Cross-validation","level":2,"score":0.4205610156059265},{"id":"https://openalex.org/C49898467","wikidata":"https://www.wikidata.org/wiki/Q1517706","display_name":"Stratified sampling","level":2,"score":0.4159452021121979},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3604532480239868},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.35771825909614563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3169318437576294},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26885291934013367},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11222-022-10138-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11222-022-10138-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11222-022-10138-7.pdf","source":{"id":"https://openalex.org/S5437875","display_name":"Statistics and Computing","issn_l":"0960-3174","issn":["0960-3174","1573-1375"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Statistics and Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11222-022-10138-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11222-022-10138-7","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11222-022-10138-7.pdf","source":{"id":"https://openalex.org/S5437875","display_name":"Statistics and Computing","issn_l":"0960-3174","issn":["0960-3174","1573-1375"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Statistics and Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1760664914","display_name":null,"funder_award_id":"U01CA140206","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"},{"id":"https://openalex.org/G3885784125","display_name":null,"funder_award_id":"U01CA140206","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4307450214.pdf","grobid_xml":"https://content.openalex.org/works/W4307450214.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1875061881","https://openalex.org/W1986241711","https://openalex.org/W2054440265","https://openalex.org/W2136916987","https://openalex.org/W2138986862","https://openalex.org/W2143481518","https://openalex.org/W2952540014","https://openalex.org/W2969625665","https://openalex.org/W2971667762","https://openalex.org/W3048191395","https://openalex.org/W4246259708","https://openalex.org/W6602130902"],"related_works":["https://openalex.org/W2387471420","https://openalex.org/W3042964584","https://openalex.org/W2356880469","https://openalex.org/W2383809451","https://openalex.org/W4206840145","https://openalex.org/W2370014976","https://openalex.org/W2350399852","https://openalex.org/W2378407977","https://openalex.org/W3047864323","https://openalex.org/W2075598034"],"abstract_inverted_index":{"Abstract":[0],"High-dimensional":[1],"cancer":[2],"data":[3,172],"can":[4,214],"be":[5,24],"burdensome":[6],"to":[7,26,47,166],"analyze,":[8],"with":[9,29,188,197,210],"complex":[10],"relationships":[11,28,150],"between":[12,151],"molecular":[13,171],"measurements,":[14],"clinical":[15,31],"diagnostics,":[16],"and":[17,99,158,184],"treatment":[18],"outcomes.":[19],"Data-driven":[20],"computational":[21],"approaches":[22,43,202],"may":[23],"key":[25],"identifying":[27],"potential":[30],"or":[32,72],"research":[33],"use.":[34],"To":[35,120],"this":[36,113],"end,":[37],"reliable":[38,123,217],"comparison":[39],"of":[40,59,94,118,143],"feature":[41,102,219],"engineering":[42,103,220],"in":[44,63,96,101,182],"their":[45],"ability":[46],"support":[48],"machine":[49],"learning":[50],"survival":[51],"modeling":[52],"is":[53,139],"crucial.":[54],"With":[55],"the":[56,92,116,204],"limited":[57,211],"number":[58],"cases":[60],"often":[61],"present":[62],"multi-omics":[64],"datasets":[65],"(\u201cbig":[66],"p":[67],",":[68],"little":[69],"n":[70],",\u201d":[71],"many":[73],"features,":[74],"few":[75],"subjects),":[76],"a":[77,130,140,159],"resampling":[78,201],"approach":[79],"such":[80],"as":[81],"cross":[82],"validation":[83],"(CV)":[84],"would":[85,111],"provide":[86,121],"robust":[87],"model":[88,125],"performance":[89,126,208],"estimates":[90],"at":[91,115],"cost":[93],"flexibility":[95,114],"intermediate":[97],"assessments":[98],"exploration":[100],"approaches.":[104],"A":[105],"holdout":[106],"(HO)":[107],"estimation":[108,209],"approach,":[109],"however,":[110],"permit":[112],"expense":[117],"reliability.":[119],"more":[122,192,216],"HO-based":[124],"estimates,":[127],"we":[128],"propose":[129],"novel":[131],"sampling":[132,136],"procedure:":[133],"representative":[134],"random":[135,152],"(RRS).":[137],"RRS":[138,168],"special":[141],"case":[142],"continuous":[144,160],"bin":[145],"stratification":[146],"which":[147],"minimizes":[148],"significant":[149,180],"HO":[153,176],"groupings":[154],"(or":[155],"CV":[156],"folds)":[157],"outcome.":[161],"Monte":[162],"Carlo":[163],"simulations":[164],"used":[165],"evaluate":[167],"on":[169],"synthetic":[170],"indicated":[173],"that":[174],"RRS-based":[175,198],"(RRHO)":[177],"yields":[178],"statistically":[179],"reductions":[181,194],"error":[183],"bias":[185],"when":[186],"compared":[187],"standard":[189,222],"HO.":[190,223],"Similarly,":[191],"consistent":[193],"are":[195,203],"observed":[196],"CV.":[199],"While":[200],"ideal":[205],"choice":[206],"for":[207],"data,":[212],"RRHO":[213],"enable":[215],"exploratory":[218],"than":[221]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
