{"id":"https://openalex.org/W4403946933","doi":"https://doi.org/10.1007/s10664-024-10554-5","title":"The effect of data complexity on classifier performance","display_name":"The effect of data complexity on classifier performance","publication_year":2024,"publication_date":"2024-10-31","ids":{"openalex":"https://openalex.org/W4403946933","doi":"https://doi.org/10.1007/s10664-024-10554-5","pmid":"https://pubmed.ncbi.nlm.nih.gov/39494321"},"language":"en","primary_location":{"id":"doi:10.1007/s10664-024-10554-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-024-10554-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-024-10554-5.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10664-024-10554-5.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036760496","display_name":"Jens Eberlein","orcid":"https://orcid.org/0000-0002-6268-1767"},"institutions":[{"id":"https://openalex.org/I124261462","display_name":"Oxford Brookes University","ror":"https://ror.org/04v2twj65","country_code":"GB","type":"education","lineage":["https://openalex.org/I124261462"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jonas Eberlein","raw_affiliation_strings":["School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK"],"affiliations":[{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","institution_ids":["https://openalex.org/I124261462"]},{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK","institution_ids":["https://openalex.org/I124261462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033472437","display_name":"Daniel Rodr\u00edguez","orcid":"https://orcid.org/0000-0002-2887-0185"},"institutions":[{"id":"https://openalex.org/I124261462","display_name":"Oxford Brookes University","ror":"https://ror.org/04v2twj65","country_code":"GB","type":"education","lineage":["https://openalex.org/I124261462"]},{"id":"https://openalex.org/I189268942","display_name":"Universidad de Alcal\u00e1","ror":"https://ror.org/04pmn0e78","country_code":"ES","type":"education","lineage":["https://openalex.org/I189268942"]}],"countries":["ES","GB"],"is_corresponding":false,"raw_author_name":"Daniel Rodriguez","raw_affiliation_strings":["Dept of Computer Science, University of Alcala, Alcal\u00e1 de Henares, Madrid, 28805 Spain","School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK"],"affiliations":[{"raw_affiliation_string":"Dept of Computer Science, University of Alcala, Alcal\u00e1 de Henares, Madrid, 28805 Spain","institution_ids":["https://openalex.org/I189268942"]},{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","institution_ids":["https://openalex.org/I124261462"]},{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK","institution_ids":["https://openalex.org/I124261462"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039467248","display_name":"Rachel Harrison","orcid":"https://orcid.org/0000-0002-0636-7546"},"institutions":[{"id":"https://openalex.org/I124261462","display_name":"Oxford Brookes University","ror":"https://ror.org/04v2twj65","country_code":"GB","type":"education","lineage":["https://openalex.org/I124261462"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rachel Harrison","raw_affiliation_strings":["School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK"],"affiliations":[{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3 0BP UK","institution_ids":["https://openalex.org/I124261462"]},{"raw_affiliation_string":"School of Technology, Oxford Brookes University, Headington Campus, Oxford, OX3, 0BP, UK","institution_ids":["https://openalex.org/I124261462"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5036760496"],"corresponding_institution_ids":["https://openalex.org/I124261462"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":4.6292,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.95272809,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"30","issue":"1","first_page":"16","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9595000147819519,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5607358813285828},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4635222852230072},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42024779319763184},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38275253772735596},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34163594245910645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5607358813285828},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4635222852230072},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42024779319763184},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38275253772735596},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34163594245910645}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/s10664-024-10554-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-024-10554-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-024-10554-5.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},{"id":"pmid:39494321","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39494321","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical software engineering","raw_type":null},{"id":"pmh:oai:ebuah.uah.es:10017/67565","is_oa":true,"landing_page_url":"http://hdl.handle.net/10017/67565","pdf_url":"https://ebuah.uah.es/dspace/bitstream/10017/67565/3/Effect_Eberlein_EmpSoftEngin_2025.pdf","source":{"id":"https://openalex.org/S7407055200","display_name":"e_Buah","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pubmedcentral.nih.gov:11527945","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11527945","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11527945/pdf/10664_2024_Article_10554.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Empir Softw Eng","raw_type":"Text"},{"id":"pmh:tle:b84c51de-6e73-4331-9505-71a58b3cc6bd:afee126f-04b2-41a9-a6dd-b29b7c6c20ab:1","is_oa":true,"landing_page_url":"https://radar.brookes.ac.uk/radar/items/b84c51de-6e73-4331-9505-71a58b3cc6bd/1/","pdf_url":"https://radar.brookes.ac.uk/radar/file/b84c51de-6e73-4331-9505-71a58b3cc6bd/1/s10664-024-10554-5.pdf","source":{"id":"https://openalex.org/S4306400541","display_name":"Radar (Oxford Brookes University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124261462","host_organization_name":"Oxford Brookes University","host_organization_lineage":["https://openalex.org/I124261462"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The Effect of Data Complexity on Classifier Performance","raw_type":"journal article"}],"best_oa_location":{"id":"doi:10.1007/s10664-024-10554-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-024-10554-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-024-10554-5.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/15"}],"awards":[{"id":"https://openalex.org/G1650125035","display_name":null,"funder_award_id":"B-I00","funder_id":"https://openalex.org/F4320322930","funder_display_name":"Ministerio de Ciencia e Innovaci\u00f3n"},{"id":"https://openalex.org/G365285158","display_name":null,"funder_award_id":"871342","funder_id":"https://openalex.org/F4320338335","funder_display_name":"H2020 European Research Council"},{"id":"https://openalex.org/G4878411931","display_name":null,"funder_award_id":"PID2021-125645OB-I00","funder_id":"https://openalex.org/F4320322930","funder_display_name":"Ministerio de Ciencia e Innovaci\u00f3n"}],"funders":[{"id":"https://openalex.org/F4320311738","display_name":"Oxford Brookes University","ror":"https://ror.org/04v2twj65"},{"id":"https://openalex.org/F4320322930","display_name":"Ministerio de Ciencia e Innovaci\u00f3n","ror":"https://ror.org/034900433"},{"id":"https://openalex.org/F4320338335","display_name":"H2020 European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403946933.pdf","grobid_xml":"https://content.openalex.org/works/W4403946933.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W43871266","https://openalex.org/W105402513","https://openalex.org/W1840338487","https://openalex.org/W1941659294","https://openalex.org/W1975040830","https://openalex.org/W1975756116","https://openalex.org/W1988844742","https://openalex.org/W1990165991","https://openalex.org/W1999785511","https://openalex.org/W2003984511","https://openalex.org/W2004710424","https://openalex.org/W2011565604","https://openalex.org/W2020458104","https://openalex.org/W2022477494","https://openalex.org/W2074549602","https://openalex.org/W2078772598","https://openalex.org/W2094764356","https://openalex.org/W2094947835","https://openalex.org/W2101728371","https://openalex.org/W2104471998","https://openalex.org/W2110183025","https://openalex.org/W2122379760","https://openalex.org/W2124474517","https://openalex.org/W2125877832","https://openalex.org/W2143637886","https://openalex.org/W2151666086","https://openalex.org/W2154941560","https://openalex.org/W2158864412","https://openalex.org/W2161407365","https://openalex.org/W2167348934","https://openalex.org/W2523268328","https://openalex.org/W2766521509","https://openalex.org/W2783324346","https://openalex.org/W2795013017","https://openalex.org/W2805001156","https://openalex.org/W2902777483","https://openalex.org/W2911964244","https://openalex.org/W2973136425","https://openalex.org/W3034060521","https://openalex.org/W3096328115","https://openalex.org/W3141989311","https://openalex.org/W3166282707","https://openalex.org/W3189331705","https://openalex.org/W4254962918","https://openalex.org/W4298630190","https://openalex.org/W4312106785","https://openalex.org/W4382397550","https://openalex.org/W4392157755"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"The":[0],"research":[1],"area":[2],"of":[3,59,71,79,95,109,134,165],"Software":[4],"Defect":[5],"Prediction":[6],"(SDP)":[7],"is":[8,14,53,74,160],"both":[9],"extensive":[10],"and":[11,13,25,99,125,136,144,149,155,178],"popular,":[12],"often":[15],"treated":[16],"as":[17],"a":[18,54,93],"classification":[19,61],"problem.":[20],"Improvements":[21],"in":[22,46,56,64,175,192],"classification,":[23],"pre-processing":[24],"tuning":[26],"techniques,":[27],"(together":[28],"with":[29,104],"many":[30],"factors":[31],"which":[32],"can":[33,184],"influence":[34],"model":[35],"performance)":[36],"have":[37],"encouraged":[38],"this":[39,67,130],"trend.":[40],"However,":[41],"no":[42],"matter":[43],"the":[44,57,60,69,77,89,105,115,141,147,150,156,163],"effort":[45],"these":[47],"areas,":[48],"it":[49],"seems":[50],"that":[51,170,179],"there":[52],"ceiling":[55],"performance":[58,73,108,151],"models":[62],"used":[63],"SDP.":[65],"In":[66,129],"paper,":[68],"issue":[70],"classifier":[72],"analysed":[75,161],"from":[76,162],"perspective":[78,164],"data":[80,83,166,181],"complexity.":[81,167],"Specifically,":[82],"complexity":[84,182],"metrics":[85,152,183],"are":[86,138,153],"calculated":[87],"using":[88],"Unified":[90,157],"Bug":[91,158],"Dataset,":[92],"collection":[94],"well-known":[96],"SDP":[97],"datasets,":[98],"then":[100],"checked":[101],"for":[102,140],"correlation":[103],"defect":[106],"prediction":[107],"machine":[110],"learning":[111],"classifiers":[112,116,148,172,189],"(in":[113],"particular,":[114],"C5.0,":[117],"Naive":[118],"Bayes,":[119],"Artificial":[120],"Neural":[121],"Networks,":[122],"Random":[123],"Forests,":[124],"Support":[126],"Vector":[127],"Machines).":[128],"work,":[131],"different":[132],"domains":[133],"competence":[135],"incompetence":[137],"identified":[139],"classifiers.":[142],"Similarities":[143],"differences":[145],"between":[146],"found":[154,169],"Dataset":[159],"We":[168],"certain":[171,176,188],"work":[173],"best":[174],"situations":[177],"all":[180],"be":[185],"problematic,":[186],"although":[187],"did":[190],"excel":[191],"some":[193],"situations.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
