{"id":"https://openalex.org/W2896236534","doi":"https://doi.org/10.1109/mci.2018.2866730","title":"Cross-Validation for Imbalanced Datasets: Avoiding Overoptimistic and Overfitting Approaches [Research Frontier]","display_name":"Cross-Validation for Imbalanced Datasets: Avoiding Overoptimistic and Overfitting Approaches [Research Frontier]","publication_year":2018,"publication_date":"2018-10-15","ids":{"openalex":"https://openalex.org/W2896236534","doi":"https://doi.org/10.1109/mci.2018.2866730","mag":"2896236534"},"language":"en","primary_location":{"id":"doi:10.1109/mci.2018.2866730","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mci.2018.2866730","pdf_url":null,"source":{"id":"https://openalex.org/S104797584","display_name":"IEEE Computational Intelligence Magazine","issn_l":"1556-603X","issn":["1556-603X","1556-6048"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computational Intelligence Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017034254","display_name":"Miriam Seoane Santos","orcid":"https://orcid.org/0000-0002-5912-963X"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Miriam Seoane Santos","raw_affiliation_strings":["Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jastin Pompeu Soares","orcid":null},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jastin Pompeu Soares","raw_affiliation_strings":["Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065859612","display_name":"Pedro Henriques Abreu","orcid":"https://orcid.org/0000-0002-9278-8194"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Pedro Henrigues Abreu","raw_affiliation_strings":["Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023181397","display_name":"H\u00e9lder Ara\u00fajo","orcid":"https://orcid.org/0000-0002-9544-424X"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Helder Araujo","raw_affiliation_strings":["Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics Engineering, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082299802","display_name":"Jo\u00e3o Santos","orcid":"https://orcid.org/0000-0003-2465-5143"},"institutions":[{"id":"https://openalex.org/I4210111164","display_name":"IPO Porto","ror":"https://ror.org/027ras364","country_code":"PT","type":"healthcare","lineage":["https://openalex.org/I4210111164"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Joao Santos","raw_affiliation_strings":["IPO-Porto Research Centre, Porto, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IPO-Porto Research Centre, Porto, Portugal","institution_ids":["https://openalex.org/I4210111164"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":19.5988,"has_fulltext":false,"cited_by_count":401,"citation_normalized_percentile":{"value":0.99372646,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"13","issue":"4","first_page":"59","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11443","display_name":"Advanced Statistical Process Monitoring","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.9677936434745789},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.9313234090805054},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7167984247207642},{"id":"https://openalex.org/keywords/cross-validation","display_name":"Cross-validation","score":0.6054673790931702},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6046707034111023},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5627590417861938},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5218400359153748},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.45992761850357056},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.45137423276901245},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.16245192289352417},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.08792191743850708},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08273822069168091}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.9677936434745789},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.9313234090805054},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7167984247207642},{"id":"https://openalex.org/C27181475","wikidata":"https://www.wikidata.org/wiki/Q541014","display_name":"Cross-validation","level":2,"score":0.6054673790931702},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6046707034111023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5627590417861938},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5218400359153748},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.45992761850357056},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.45137423276901245},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16245192289352417},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.08792191743850708},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08273822069168091},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mci.2018.2866730","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mci.2018.2866730","pdf_url":null,"source":{"id":"https://openalex.org/S104797584","display_name":"IEEE Computational Intelligence Magazine","issn_l":"1556-603X","issn":["1556-603X","1556-6048"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computational Intelligence Magazine","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1492567620","https://openalex.org/W1496056137","https://openalex.org/W1591261915","https://openalex.org/W1592804209","https://openalex.org/W1993220166","https://openalex.org/W2011376672","https://openalex.org/W2016710902","https://openalex.org/W2040260156","https://openalex.org/W2051224630","https://openalex.org/W2053724458","https://openalex.org/W2078885513","https://openalex.org/W2081697244","https://openalex.org/W2085487226","https://openalex.org/W2085770564","https://openalex.org/W2087240369","https://openalex.org/W2088059023","https://openalex.org/W2097521902","https://openalex.org/W2104933073","https://openalex.org/W2107686700","https://openalex.org/W2115969689","https://openalex.org/W2118978333","https://openalex.org/W2125877832","https://openalex.org/W2132791018","https://openalex.org/W2137195266","https://openalex.org/W2146369703","https://openalex.org/W2148143831","https://openalex.org/W2157825442","https://openalex.org/W2164330572","https://openalex.org/W2167464971","https://openalex.org/W2171141701","https://openalex.org/W2226725405","https://openalex.org/W2297432279","https://openalex.org/W2300921526","https://openalex.org/W2471967309","https://openalex.org/W2512345558","https://openalex.org/W2518774129","https://openalex.org/W2530103281","https://openalex.org/W2548330752","https://openalex.org/W2550511032","https://openalex.org/W2555835967","https://openalex.org/W2562319768","https://openalex.org/W2598548966","https://openalex.org/W2607312241","https://openalex.org/W2999729612","https://openalex.org/W6629480399","https://openalex.org/W6635474240","https://openalex.org/W6675634716","https://openalex.org/W6729211661"],"related_works":["https://openalex.org/W1574414179","https://openalex.org/W4362597605","https://openalex.org/W4389965896","https://openalex.org/W4387747402","https://openalex.org/W3119578451","https://openalex.org/W2972423375","https://openalex.org/W2149651625","https://openalex.org/W2801469686","https://openalex.org/W2149903055","https://openalex.org/W1983416467"],"abstract_inverted_index":{"Although":[0],"cross-validation":[1],"is":[2,30],"a":[3],"standard":[4],"procedure":[5],"for":[6,18],"performance":[7],"evaluation,":[8],"its":[9],"joint":[10],"application":[11,32],"with":[12],"oversampling":[13,34],"remains":[14],"an":[15],"open":[16],"question":[17],"researchers":[19],"farther":[20],"from":[21],"the":[22,31,37],"imbalanced":[23],"data":[24],"topic.":[25],"A":[26],"frequent":[27],"experimental":[28],"flaw":[29],"of":[33],"algorithms":[35],"to":[36],"entire":[38],"dataset,":[39],"resulting":[40],"in":[41],"biased":[42],"models":[43],"and":[44],"overly-optimistic":[45],"estimates.":[46]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":59},{"year":2024,"cited_by_count":61},{"year":2023,"cited_by_count":73},{"year":2022,"cited_by_count":81},{"year":2021,"cited_by_count":59},{"year":2020,"cited_by_count":44},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
