{"id":"https://openalex.org/W4226336064","doi":"https://doi.org/10.1007/s10994-024-06652-6","title":"On the usefulness of the fit-on-test view on evaluating calibration of classifiers","display_name":"On the usefulness of the fit-on-test view on evaluating calibration of classifiers","publication_year":2025,"publication_date":"2025-02-24","ids":{"openalex":"https://openalex.org/W4226336064","doi":"https://doi.org/10.1007/s10994-024-06652-6"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-024-06652-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06652-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06652-6.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06652-6.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035768484","display_name":"Markus K\u00e4ngsepp","orcid":"https://orcid.org/0000-0002-1916-4350"},"institutions":[{"id":"https://openalex.org/I56085075","display_name":"University of Tartu","ror":"https://ror.org/03z77qz90","country_code":"EE","type":"education","lineage":["https://openalex.org/I56085075"]}],"countries":["EE"],"is_corresponding":true,"raw_author_name":"Markus K\u00e4ngsepp","raw_affiliation_strings":["Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia"],"raw_orcid":"https://orcid.org/0000-0002-1916-4350","affiliations":[{"raw_affiliation_string":"Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia","institution_ids":["https://openalex.org/I56085075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034780508","display_name":"Kaspar Valk","orcid":null},"institutions":[{"id":"https://openalex.org/I56085075","display_name":"University of Tartu","ror":"https://ror.org/03z77qz90","country_code":"EE","type":"education","lineage":["https://openalex.org/I56085075"]}],"countries":["EE"],"is_corresponding":false,"raw_author_name":"Kaspar Valk","raw_affiliation_strings":["Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia","institution_ids":["https://openalex.org/I56085075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087318731","display_name":"Meelis Kull","orcid":"https://orcid.org/0000-0001-9257-595X"},"institutions":[{"id":"https://openalex.org/I56085075","display_name":"University of Tartu","ror":"https://ror.org/03z77qz90","country_code":"EE","type":"education","lineage":["https://openalex.org/I56085075"]}],"countries":["EE"],"is_corresponding":false,"raw_author_name":"Meelis Kull","raw_affiliation_strings":["Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Computer Science, University of Tartu, Narva mnt, 51009, Tartu, Tartumaa, Estonia","institution_ids":["https://openalex.org/I56085075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035768484"],"corresponding_institution_ids":["https://openalex.org/I56085075"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00025192,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"114","issue":"4","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7351999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.7351999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.06449999660253525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.01860000006854534,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.756631076335907},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6247061491012573},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5742138624191284},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5586646795272827},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4566896855831146},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44962722063064575},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40316545963287354},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3475051522254944},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3049274981021881},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2980819344520569}],"concepts":[{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.756631076335907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6247061491012573},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5742138624191284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5586646795272827},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4566896855831146},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44962722063064575},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40316545963287354},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3475051522254944},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3049274981021881},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2980819344520569},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-024-06652-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06652-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06652-6.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-024-06652-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06652-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06652-6.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1653152820","display_name":"Contextual uncertainty and representation learning in machine perception","funder_award_id":"PRG1604","funder_id":"https://openalex.org/F4320321090","funder_display_name":"Eesti Teadusagentuur"}],"funders":[{"id":"https://openalex.org/F4320321090","display_name":"Eesti Teadusagentuur","ror":"https://ror.org/00jjeja18"},{"id":"https://openalex.org/F4320338080","display_name":"European Social Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226336064.pdf"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1213593253","https://openalex.org/W2012942264","https://openalex.org/W2073241381","https://openalex.org/W2089953742","https://openalex.org/W2098824882","https://openalex.org/W2099174412","https://openalex.org/W2099972513","https://openalex.org/W2194775991","https://openalex.org/W2254249950","https://openalex.org/W2548695521","https://openalex.org/W2725875209","https://openalex.org/W2917792182","https://openalex.org/W2963446712","https://openalex.org/W2970404282","https://openalex.org/W2971118045","https://openalex.org/W3011253634","https://openalex.org/W3012254281","https://openalex.org/W3023457794","https://openalex.org/W3118608800","https://openalex.org/W4306706453","https://openalex.org/W4379958559","https://openalex.org/W6713132643","https://openalex.org/W6739651123","https://openalex.org/W6761586423","https://openalex.org/W6767915748","https://openalex.org/W6779655649","https://openalex.org/W6787117189"],"related_works":["https://openalex.org/W36490","https://openalex.org/W65220","https://openalex.org/W728297","https://openalex.org/W206613","https://openalex.org/W1086253","https://openalex.org/W13076802","https://openalex.org/W17636424","https://openalex.org/W7872592","https://openalex.org/W9488309","https://openalex.org/W14944530"],"abstract_inverted_index":{"Abstract":[0],"Calibrated":[1],"uncertainty":[2,26],"estimates":[3],"are":[4],"essential":[5],"for":[6],"classifiers":[7],"used":[8],"in":[9,45,90,139,176],"safety-critical":[10],"applications.":[11],"If":[12],"a":[13,20,65,110,159,167],"classifier":[14],"is":[15,19,42,109,150,157],"uncalibrated,":[16],"then":[17],"there":[18],"unique":[21],"way":[22],"to":[23,34,117,166,186],"calibrate":[24],"its":[25],"using":[27],"the":[28,38,71,82,105,125,151,172,193,198],"idealistic":[29],"true":[30,39],"calibration":[31,40,54,62,76,79,97,106,130,154,217,238],"map":[32,41,218],"corresponding":[33],"this":[35,118],"classifier.":[36],"Although":[37],"typically":[43],"unknown":[44],"practice,":[46],"it":[47],"can":[48,184],"be":[49],"estimated":[50],"with":[51,178,197,210],"many":[52],"post-hoc":[53,75,129,235],"methods":[55,77],"which":[56,149,229],"fit":[57],"some":[58],"family":[59],"of":[60,85,104,127,141,170,174,207,215,228],"potential":[61],"functions":[63],"on":[64,87,99,181],"validation":[66],"dataset.":[67],"This":[68,122,162],"paper":[69],"examines":[70],"connection":[72],"between":[73],"such":[74],"and":[78,114,189,195,221,223,237],"evaluation.":[80],"Despite":[81],"negative":[83],"connotations":[84],"fitting":[86,96],"test":[88,100,182],"data":[89,101,183],"machine":[91],"learning,":[92],"we":[93,115,191],"claim":[94],"that":[95,146],"maps":[98],"as":[102,120,132,234],"part":[103],"evaluation":[107,134,142,155],"process":[108],"method":[111,131,169],"worth":[112],"considering,":[113],"refer":[116],"view":[119,123],"fit-on-test.":[121],"enables":[124],"usage":[126],"any":[128],"an":[133,225],"measure,":[135],"unlocking":[136],"missed":[137],"opportunities":[138],"development":[140,214],"methods.":[143],"We":[144],"prove":[145],"even":[147],"ECE,":[148],"most":[152],"common":[153],"method,":[156],"actually":[158],"fit-on-test":[160,199],"measure.":[161],"observation":[163],"leads":[164],"us":[165],"new":[168,216],"tuning":[171],"number":[173],"bins":[175],"ECE":[177],"cross-validation.":[179],"Fitting":[180],"lead":[185],"test-time":[187],"overfitting,":[188],"therefore,":[190],"discuss":[192],"limitations":[194],"concerns":[196],"view.":[200],"Our":[201],"contributions":[202],"also":[203],"include:":[204],"(1)":[205],"enhancement":[206],"reliability":[208],"diagrams":[209],"diagonal":[211],"filling;":[212],"(2)":[213],"families":[219,230],"PL":[220],"PL3;":[222],"(3)":[224],"experimental":[226],"study":[227],"perform":[231],"strongly":[232],"both":[233],"calibrators":[236],"evaluators.":[239]},"counts_by_year":[],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2022-05-05T00:00:00"}
