{"id":"https://openalex.org/W7125610441","doi":"https://doi.org/10.1007/s10994-025-06966-z","title":"Weakly Supervised Classification with Pre-Trained Models: A Robust Fine-Tuning Approach","display_name":"Weakly Supervised Classification with Pre-Trained Models: A Robust Fine-Tuning Approach","publication_year":2026,"publication_date":"2026-01-23","ids":{"openalex":"https://openalex.org/W7125610441","doi":"https://doi.org/10.1007/s10994-025-06966-z"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-025-06966-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06966-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06966-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06966-z.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123739355","display_name":"Ming Li","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ming Li","raw_affiliation_strings":["Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123765957","display_name":"Wei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]},{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Center for Advanced Intelligence Project, RIKEN, Chuo-ku, Tokyo, 103-0027, Japan","Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan"],"affiliations":[{"raw_affiliation_string":"Center for Advanced Intelligence Project, RIKEN, Chuo-ku, Tokyo, 103-0027, Japan","institution_ids":["https://openalex.org/I4210126580"]},{"raw_affiliation_string":"Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101484364","display_name":"Shaojun Ma","orcid":"https://orcid.org/0009-0004-4212-4383"},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]},{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masashi Sugiyama","raw_affiliation_strings":["Center for Advanced Intelligence Project, RIKEN, Chuo-ku, Tokyo, 103-0027, Japan","Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan"],"affiliations":[{"raw_affiliation_string":"Center for Advanced Intelligence Project, RIKEN, Chuo-ku, Tokyo, 103-0027, Japan","institution_ids":["https://openalex.org/I4210126580"]},{"raw_affiliation_string":"Graduate School of Frontier Sciences, The University of Tokyo, Kashiwa-shi, Chiba, 277-8561, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5123765957"],"corresponding_institution_ids":["https://openalex.org/I4210126580","https://openalex.org/I74801974"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28207098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"115","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.2694999873638153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.2694999873638153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1412000060081482,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.12919999659061432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.8361999988555908},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5993000268936157},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.45969998836517334},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4422000050544739},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.388700008392334},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.37400001287460327},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.3382999897003174},{"id":"https://openalex.org/keywords/linear-classifier","display_name":"Linear classifier","score":0.3357999920845032},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.31439998745918274}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.8361999988555908},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7103999853134155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6833000183105469},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6754000186920166},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5993000268936157},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.45969998836517334},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4422000050544739},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.37400001287460327},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C139532973","wikidata":"https://www.wikidata.org/wiki/Q2679259","display_name":"Linear classifier","level":3,"score":0.3357999920845032},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.31439998745918274},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C5465570","wikidata":"https://www.wikidata.org/wiki/Q5326898","display_name":"Early stopping","level":3,"score":0.3034000098705292},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.29280000925064087},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28790000081062317},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C2780150128","wikidata":"https://www.wikidata.org/wiki/Q21948731","display_name":"Extreme learning machine","level":3,"score":0.2718999981880188},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-025-06966-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06966-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06966-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-025-06966-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06966-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06966-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7125610441.pdf"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1977295328","https://openalex.org/W1982032418","https://openalex.org/W2095838485","https://openalex.org/W2117539524","https://openalex.org/W2620998106","https://openalex.org/W2964194231","https://openalex.org/W3091002423","https://openalex.org/W4226426325","https://openalex.org/W4236362309","https://openalex.org/W4287121781","https://openalex.org/W4312363706","https://openalex.org/W4312651322","https://openalex.org/W4385573131","https://openalex.org/W4386065763","https://openalex.org/W4401055721","https://openalex.org/W4402770330","https://openalex.org/W4402783842","https://openalex.org/W4403649759"],"related_works":[],"abstract_inverted_index":{"Weakly":[0],"supervised":[1,52],"classification":[2,47,123],"(WSC)":[3],"is":[4,40],"a":[5,14,30,36,81,116],"popular":[6],"machine":[7],"learning":[8],"paradigm":[9,69],"that":[10,39,92,125],"aims":[11],"to":[12,28,42,65,70,79,102],"train":[13],"classifier":[15],"using":[16,85,121],"incomplete,":[17],"inexact,":[18],"or":[19],"inaccurate":[20],"supervision.":[21],"Recently,":[22],"it":[23,62],"has":[24],"become":[25],"common":[26],"practice":[27],"use":[29,94],"general-purpose,":[31],"large,":[32],"pre-trained":[33,82],"model":[34,38,137],"as":[35],"foundation":[37],"fine-tuned":[41],"solve":[43],"complex,":[44],"challenging":[45],"downstream":[46,53],"problems.":[48,109],"However,":[49],"collecting":[50],"fully":[51],"data":[54],"can":[55],"be":[56],"costly":[57],"in":[58],"certain":[59],"domains.":[60],"Thus,":[61],"makes":[63],"sense":[64],"apply":[66],"the":[67,71,86,142,148,161],"WSC":[68,87,97],"fine-tuning":[72,119],"scenario.":[73],"In":[74],"this":[75],"paper,":[76],"we":[77,114,140],"attempt":[78],"fine-tune":[80],"vision":[83],"transformer":[84],"approach.":[88],"Our":[89],"experiments":[90],"show":[91],"naive":[93],"of":[95,163],"existing":[96],"losses":[98],"degrades":[99],"performance":[100],"due":[101],"severe":[103],"overfitting":[104],"exacerbation":[105],"and":[106,134,144],"feature":[107],"degeneration":[108],"To":[110],"address":[111],"these":[112],"problems,":[113],"propose":[115],"novel":[117],"robust":[118],"approach":[120,166],"dual":[122],"heads":[124],"are":[126],"trained":[127],"synergistically":[128],"by":[129],"alternately":[130],"distilling":[131],"reliable":[132],"supervision":[133],"performing":[135],"efficient":[136],"fine-tuning.":[138],"Theoretically,":[139],"prove":[141],"consistency":[143],"convergence":[145],"rate":[146],"for":[147],"proposed":[149,165],"risk":[150],"estimator.":[151],"Empirically,":[152],"extensive":[153],"experimental":[154],"results":[155],"on":[156],"diverse":[157],"benchmark":[158],"datasets":[159],"validate":[160],"effectiveness":[162],"our":[164],"against":[167],"state-of-the-art":[168],"approaches.":[169]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2026-01-25T00:00:00"}
