{"id":"https://openalex.org/W7117116891","doi":"https://doi.org/10.1109/tpami.2025.3647862","title":"Forget Me Not: Fighting Local Overfitting With Knowledge Fusion and Distillation","display_name":"Forget Me Not: Fighting Local Overfitting With Knowledge Fusion and Distillation","publication_year":2025,"publication_date":"2025-12-24","ids":{"openalex":"https://openalex.org/W7117116891","doi":"https://doi.org/10.1109/tpami.2025.3647862","pmid":"https://pubmed.ncbi.nlm.nih.gov/41442301"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3647862","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3647862","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121125469","display_name":"Uri Stern","orcid":null},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Uri Stern","raw_affiliation_strings":["School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel"],"raw_orcid":"https://orcid.org/0009-0003-5083-7001","affiliations":[{"raw_affiliation_string":"School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121201182","display_name":"Eli Corn","orcid":null},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Eli Corn","raw_affiliation_strings":["School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel"],"raw_orcid":"https://orcid.org/0009-0006-9766-8892","affiliations":[{"raw_affiliation_string":"School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000206190","display_name":"Daphna Weinshall","orcid":null},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Daphna Weinshall","raw_affiliation_strings":["School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel"],"raw_orcid":"https://orcid.org/0000-0001-8893-8586","affiliations":[{"raw_affiliation_string":"School of Computer Science, Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5121125469"],"corresponding_institution_ids":["https://openalex.org/I197251160"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79884284,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"48","issue":"5","first_page":"5004","last_page":"5015"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.567300021648407,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.567300021648407,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.15809999406337738,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.10700000077486038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.9835000038146973},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.75},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49549999833106995},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.47780001163482666},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4555000066757202},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.45159998536109924},{"id":"https://openalex.org/keywords/dropout","display_name":"Dropout (neural networks)","score":0.38100001215934753},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3634999990463257}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.9835000038146973},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7857000231742859},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.75},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7314000129699707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7285000085830688},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49549999833106995},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.47780001163482666},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4555000066757202},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C2776145597","wikidata":"https://www.wikidata.org/wiki/Q25339462","display_name":"Dropout (neural networks)","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3634999990463257},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.25}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3647862","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3647862","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41442301","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41442301","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320033","display_name":"Gatsby Charitable Foundation","ror":"https://ror.org/0290hax27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Overfitting":[0],"in":[1,33,44,175],"deep":[2,65],"neural":[3],"networks":[4],"occurs":[5],"less":[6],"frequently":[7],"than":[8],"expected.":[9],"This":[10],"is":[11,30,99],"a":[12,56,76,113,122,144,201],"puzzling":[13],"observation,":[14],"as":[15],"theory":[16],"predicts":[17],"that":[18,59,89,116],"greater":[19],"model":[20,124,146,195],"capacity":[21],"should":[22],"eventually":[23],"lead":[24],"to":[25,80,102,125],"overfitting":[26,38,91],"-":[27,183,190],"yet":[28],"this":[29,52],"rarely":[31],"seen":[32],"practice.":[34],"But":[35],"what":[36,71],"if":[37],"does":[39],"occur,":[40],"not":[41],"globally,":[42],"but":[43],"specific":[45],"sub-regions":[46],"of":[47,64,83,121,147,171,178],"the":[48,61,84,103,118,148,169,176,193],"data":[49],"space?":[50],"In":[51],"work,":[53],"we":[54,72,111],"introduce":[55,112],"novel":[57],"score":[58],"measures":[60],"forgetting":[62],"rate":[63],"models":[66],"on":[67,108],"validation":[68],"data,":[69],"capturing":[70],"term":[73],"local":[74,90],"overfitting:":[75],"performance":[77,153],"degradation":[78],"confined":[79],"certain":[81],"regions":[82],"input":[85],"space.":[86],"We":[87],"demonstrate":[88],"can":[92],"arise":[93],"even":[94],"without":[95,154],"conventional":[96],"overfitting,":[97],"and":[98,127,138,165,196,207],"closely":[100],"linked":[101],"double":[104],"descent":[105],"phenomenon.":[106],"Building":[107],"these":[109],"insights,":[110],"two-stage":[114],"approach":[115],"leverages":[117],"training":[119,166,206],"history":[120],"single":[123,145],"recover":[126],"retain":[128],"forgotten":[129],"knowledge:":[130],"first,":[131],"by":[132,140,187],"aggregating":[133],"checkpoints":[134],"into":[135,143],"an":[136],"ensemble,":[137],"then":[139],"distilling":[141],"it":[142],"original":[149,194],"size,":[150],"thus":[151],"enhancing":[152],"added":[155],"inference":[156,208],"cost.":[157],"Extensive":[158],"experiments":[159],"across":[160],"multiple":[161],"datasets,":[162],"modern":[163],"architectures,":[164],"regimes":[167],"validate":[168],"effectiveness":[170],"our":[172,181],"approach.":[173],"Notably,":[174],"presence":[177],"label":[179],"noise,":[180],"method":[182],"Knowledge":[184,188],"Fusion":[185],"followed":[186],"Distillation":[189],"outperforms":[191],"both":[192],"independently":[197],"trained":[198],"ensembles,":[199],"achieving":[200],"rare":[202],"win-win":[203],"scenario:":[204],"reduced":[205],"complexity.":[209]},"counts_by_year":[],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-12-24T00:00:00"}
