{"id":"https://openalex.org/W4414360194","doi":"https://doi.org/10.24963/ijcai.2025/1227","title":"Confidence-based Estimators for Predictive Performance in Model Monitoring (Abstract Reprint)","display_name":"Confidence-based Estimators for Predictive Performance in Model Monitoring (Abstract Reprint)","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360194","doi":"https://doi.org/10.24963/ijcai.2025/1227"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/1227","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092580474","display_name":"Juhani Kivim\u00e4ki","orcid":"https://orcid.org/0000-0002-9673-9760"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Juhani Kivim\u00e4ki","raw_affiliation_strings":["University of Helsinki, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034505239","display_name":"Jakub Bia\u0142ek","orcid":"https://orcid.org/0000-0001-5850-6934"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jakub Bia\u0142ek","raw_affiliation_strings":["University of Helsinki, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042260991","display_name":"Jukka K. Nurminen","orcid":"https://orcid.org/0000-0001-5083-1927"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jukka K. Nurminen","raw_affiliation_strings":["NannyML"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NannyML","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080026850","display_name":"Wojtek Kuberski","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wojtek Kuberski","raw_affiliation_strings":["NannyML"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NannyML","institution_ids":["https://openalex.org/I4210122684"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5092580474"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28802411,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10953","last_page":"10953"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9088000059127808,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9088000059127808,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.7788000106811523},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.7767000198364258},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6789000034332275},{"id":"https://openalex.org/keywords/confidence-interval","display_name":"Confidence interval","score":0.5199000239372253},{"id":"https://openalex.org/keywords/covariate","display_name":"Covariate","score":0.49239999055862427},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.4648999869823456},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.45969998836517334}],"concepts":[{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.7788000106811523},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7767000198364258},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6789000034332275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5508000254631042},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.5199000239372253},{"id":"https://openalex.org/C119043178","wikidata":"https://www.wikidata.org/wiki/Q320723","display_name":"Covariate","level":2,"score":0.49239999055862427},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.45969998836517334},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40220001339912415},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.40149998664855957},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.35350000858306885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3370000123977661},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3206000030040741},{"id":"https://openalex.org/C194531419","wikidata":"https://www.wikidata.org/wiki/Q17104825","display_name":"Nuisance parameter","level":3,"score":0.310699999332428},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30959999561309814},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2924000024795532},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C140529851","wikidata":"https://www.wikidata.org/wiki/Q5160083","display_name":"Confidence and prediction bands","level":3,"score":0.2605000138282776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/1227","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"After":[0],"a":[1,63,94,120,139,213,236],"machine":[2],"learning":[3],"model":[4,95,109,130,186],"has":[5],"been":[6,102,156],"deployed":[7],"into":[8],"production,":[9],"its":[10,217],"predictive":[11,81,91],"performance":[12,82,92],"needs":[13],"to":[14,35,212,272,289],"be":[15,21,36,43,290],"monitored.":[16],"Ideally,":[17],"such":[18,75],"monitoring":[19,78,237],"can":[20],"carried":[22],"out":[23],"by":[24,166,194],"comparing":[25,229],"the":[26,38,68,79,90,133,147,151,168,191,200,255,258,267,280,284],"model\u2019s":[27,80],"predictions":[28],"against":[29,119,231],"ground":[30,39,56,97],"truth":[31,40,57,98],"labels.":[32],"For":[33],"this":[34,160,164],"possible,":[37],"labels":[41,58],"must":[42],"available":[44,60],"relatively":[45],"soon":[46],"after":[47,62],"inference.":[48],"However,":[49,144,279],"there":[50],"are":[51,59,116],"many":[52,277],"use":[53,218],"cases":[54],"where":[55],"only":[61],"significant":[64],"delay,":[65],"or":[66,111],"in":[67,219,235,276],"worst":[69],"case,":[70],"not":[71,155],"at":[72],"all.":[73],"In":[74,159],"cases,":[76],"directly":[77],"is":[83,99,179,270,287],"impossible.":[84],"Recently,":[85],"novel":[86],"methods":[87,107],"for":[88,138,199,251],"estimating":[89],"of":[93,105,135,142,150,185,257,283],"when":[96],"unavailable":[100],"have":[101,154],"developed.":[103],"Many":[104],"these":[106],"leverage":[108],"confidence":[110,136,197],"other":[112,274],"uncertainty":[113],"estimates":[114,129,201],"and":[115,171,182],"experimentally":[117],"compared":[118],"naive":[121],"baseline":[122],"method,":[123],"namely":[124],"Average":[125],"Confidence":[126],"(AC),":[127],"which":[128,249],"accuracy":[131],"as":[132],"average":[134],"scores":[137],"given":[140],"set":[141],"predictions.":[143],"until":[145],"now":[146],"theoretical":[148,224],"properties":[149],"AC":[152,169,192,207,230,268],"method":[153,170,193,269],"properly":[157],"explored.":[158],"paper,":[161],"we":[162],"bridge":[163],"gap":[165],"reviewing":[167],"show":[172,265],"that":[173,266],"under":[174,239],"certain":[175],"general":[176],"assumptions,":[177],"it":[178,202],"an":[180,209],"unbiased":[181],"consistent":[183],"estimator":[184,211],"accuracy.":[187],"We":[188,221,242],"also":[189],"augment":[190],"deriving":[195],"valid":[196],"intervals":[198],"produces.":[203],"These":[204],"contributions":[205],"elevate":[206],"from":[208],"ad-hoc":[210],"principled":[214],"one,":[215],"encouraging":[216],"practice.":[220],"complement":[222],"our":[223,244],"results":[225],"with":[226,262],"empirical":[227],"experiments,":[228],"more":[232],"complex":[233],"estimators":[234,275,286],"setting":[238],"covariate":[240],"shift.":[241,259],"conduct":[243],"experiments":[245,261],"using":[246],"synthetic":[247],"datasets,":[248],"allow":[250],"full":[252],"control":[253],"over":[254],"nature":[256],"Our":[260],"binary":[263],"classifiers":[264],"able":[271],"beat":[273],"cases.":[278],"comparative":[281],"quality":[282],"different":[285],"found":[288],"heavily":[291],"case-dependent.":[292]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
