{"id":"https://openalex.org/W7134236902","doi":"https://doi.org/10.1007/s10994-025-06970-3","title":"Performance Estimation in Binary Classification Using Calibrated Confidence","display_name":"Performance Estimation in Binary Classification Using Calibrated Confidence","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7134236902","doi":"https://doi.org/10.1007/s10994-025-06970-3"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-025-06970-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06970-3","pdf_url":null,"source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s10994-025-06970-3","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Juhani Kivim\u00e4ki","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Juhani Kivim\u00e4ki","raw_affiliation_strings":["University of Helsinki, PL 64, 00014, Helsinki, Finland"],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jakub Bia\u0142ek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jakub Bia\u0142ek","raw_affiliation_strings":["NannyML, Diestsesteenweg 50/20, Leuven, Belgium"],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wojtek Kuberski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wojtek Kuberski","raw_affiliation_strings":["NannyML, Diestsesteenweg 50/20, Leuven, Belgium"],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Jukka K. Nurminen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jukka K. Nurminen","raw_affiliation_strings":["University of Helsinki, PL 64, 00014, Helsinki, Finland"],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.97640745,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"115","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2842999994754791,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2842999994754791,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.13979999721050262,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.07840000092983246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6144000291824341},{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.5899999737739563},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.5810999870300293},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.578499972820282},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.5525000095367432},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5275999903678894},{"id":"https://openalex.org/keywords/confusion-matrix","display_name":"Confusion matrix","score":0.5224000215530396},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.48969998955726624},{"id":"https://openalex.org/keywords/confidence-interval","display_name":"Confidence interval","score":0.4068000018596649}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6324999928474426},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6144000291824341},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.5810999870300293},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.578499972820282},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.5525000095367432},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5275999903678894},{"id":"https://openalex.org/C138602881","wikidata":"https://www.wikidata.org/wiki/Q2709591","display_name":"Confusion matrix","level":2,"score":0.5224000215530396},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4869000017642975},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4756999909877777},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37400001287460327},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.3686999976634979},{"id":"https://openalex.org/C2776743756","wikidata":"https://www.wikidata.org/wiki/Q5097921","display_name":"Safeguarding","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C122123141","wikidata":"https://www.wikidata.org/wiki/Q176623","display_name":"Random variable","level":2,"score":0.3325999975204468},{"id":"https://openalex.org/C2909755999","wikidata":"https://www.wikidata.org/wiki/Q4751126","display_name":"Low Confidence","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C41426520","wikidata":"https://www.wikidata.org/wiki/Q1192065","display_name":"Point estimation","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.302700012922287},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2779190172","wikidata":"https://www.wikidata.org/wiki/Q4913888","display_name":"Binary data","level":3,"score":0.2632000148296356},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.26010000705718994},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.257099986076355},{"id":"https://openalex.org/C2776292839","wikidata":"https://www.wikidata.org/wiki/Q5179217","display_name":"Coverage probability","level":3,"score":0.25380000472068787},{"id":"https://openalex.org/C56672385","wikidata":"https://www.wikidata.org/wiki/Q17157111","display_name":"Mixture distribution","level":3,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-025-06970-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06970-3","pdf_url":null,"source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-025-06970-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06970-3","pdf_url":null,"source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1965014932","https://openalex.org/W2012942264","https://openalex.org/W2028138594","https://openalex.org/W2585528949","https://openalex.org/W4214627684","https://openalex.org/W4226242428","https://openalex.org/W4385478229","https://openalex.org/W4404926997","https://openalex.org/W4406643990","https://openalex.org/W7133232556","https://openalex.org/W7133246522"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Model":[1],"monitoring":[2,24,47],"is":[3,176,197],"a":[4,114,181],"critical":[5],"component":[6],"of":[7,57,89,154,166],"the":[8,17,55,98,126,152,155,167,192],"machine":[9],"learning":[10],"lifecycle,":[11],"safeguarding":[12],"against":[13],"undetected":[14],"drops":[15],"in":[16,41,83],"model\u2019s":[18],"performance":[19,23,46,82],"after":[20],"deployment.":[21],"Traditionally,":[22],"has":[25,93],"required":[26],"access":[27,61],"to":[28,53,62,146,169,199],"ground":[29],"truth":[30],"labels,":[31],"which":[32],"are":[33,70],"not":[34],"always":[35],"readily":[36],"available.":[37],"This":[38],"can":[39,118,188],"result":[40],"unacceptable":[42],"latency":[43],"or":[44],"render":[45],"altogether":[48],"impossible.":[49],"Recently,":[50],"methods":[51],"designed":[52],"estimate":[54,119,170],"accuracy":[56],"classifier":[58],"models":[59],"without":[60],"labels":[63],"have":[64],"shown":[65,198],"promising":[66],"results.":[67],"However,":[68],"there":[69],"various":[71],"other":[72],"metrics":[73,92,134],"that":[74,117,202],"might":[75],"be":[76,189],"more":[77],"suitable":[78],"for":[79],"assessing":[80],"model":[81,168],"many":[84],"cases.":[85],"Until":[86],"now,":[87],"none":[88],"these":[90],"important":[91],"received":[94],"similar":[95],"interest":[96],"from":[97,135,191],"scientific":[99],"community.":[100],"In":[101,129],"this":[102,106,136],"work,":[103],"we":[104,131],"address":[105],"gap":[107],"by":[108],"presenting":[109],"Confidence-based":[110],"Performance":[111],"estimation":[112],"(CBPE),":[113],"novel":[115],"method":[116],"any":[120],"binary":[121],"classification":[122],"metric":[123,175],"defined":[124],"using":[125],"confusion":[127,156,194],"matrix.":[128,195],"particular,":[130],"choose":[132],"four":[133],"large":[137],"family:":[138],"accuracy,":[139],"precision,":[140],"recall,":[141],"and":[142,161,208],"$$\\hbox":[143],"{F}_1$$":[144],",":[145],"demonstrate":[147],"our":[148],"method.":[149],"CBPE":[150,196],"treats":[151],"elements":[153],"matrix":[157],"as":[158,180],"random":[159,182],"variables":[160],"leverages":[162],"calibrated":[163],"confidence":[164,210],"scores":[165],"their":[171],"distributions.":[172],"The":[173],"desired":[174],"then":[177],"also":[178],"treated":[179],"variable,":[183],"whose":[184],"full":[185],"probability":[186],"distribution":[187],"derived":[190],"estimated":[193],"produce":[200],"estimates":[201],"come":[203],"with":[204],"strong":[205],"theoretical":[206],"guarantees":[207],"valid":[209],"intervals.":[211]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2026-02-27T00:00:00"}
