{"id":"https://openalex.org/W4416183401","doi":"https://doi.org/10.1109/sibgrapi67909.2025.11223402","title":"Enhancing Distilled Datasets Via Natural Data Mixing","display_name":"Enhancing Distilled Datasets Via Natural Data Mixing","publication_year":2025,"publication_date":"2025-09-30","ids":{"openalex":"https://openalex.org/W4416183401","doi":"https://doi.org/10.1109/sibgrapi67909.2025.11223402"},"language":null,"primary_location":{"id":"doi:10.1109/sibgrapi67909.2025.11223402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sibgrapi67909.2025.11223402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 38th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120446178","display_name":"Ian Pons","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ian Pons","raw_affiliation_strings":["Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046589480","display_name":"Guillaume Stern","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guilherme B. Stern","raw_affiliation_strings":["Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069264027","display_name":"Anna Helena Reali Costa","orcid":"https://orcid.org/0000-0001-7309-4528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anna H. Reali Costa","raw_affiliation_strings":["Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Artur Jordao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Artur Jordao","raw_affiliation_strings":["Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade de S&#x00E3;o Paulo,Escola Polit&#x00E9;cnica,S&#x00E3;o Paulo,Brazil","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29781121,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.21570000052452087,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.21570000052452087,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2029000073671341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.14100000262260437,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6708999872207642},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6437000036239624},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6182000041007996},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5978999733924866},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5784000158309937},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.43650001287460327},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4120999872684479},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.39480000734329224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6729999780654907},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6708999872207642},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6437000036239624},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6182000041007996},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5978999733924866},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5784000158309937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5202000141143799},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.43650001287460327},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4120999872684479},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40139999985694885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39739999175071716},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.39480000734329224},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3684000074863434},{"id":"https://openalex.org/C2779429693","wikidata":"https://www.wikidata.org/wiki/Q274959","display_name":"Distilled water","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2935999929904938},{"id":"https://openalex.org/C117765406","wikidata":"https://www.wikidata.org/wiki/Q5362437","display_name":"Generalization error","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C154030694","wikidata":"https://www.wikidata.org/wiki/Q1436074","display_name":"Fractionating column","level":3,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sibgrapi67909.2025.11223402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sibgrapi67909.2025.11223402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 38th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2072034955","display_name":null,"funder_award_id":"402734/2023-8,312360/2023-1","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G599566147","display_name":null,"funder_award_id":"2023/111630,2024/17684-4","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"}],"funders":[{"id":"https://openalex.org/F4320320997","display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo","ror":"https://ror.org/02ddkpn78"},{"id":"https://openalex.org/F4320321091","display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","ror":"https://ror.org/00x0ma614"},{"id":"https://openalex.org/F4320322025","display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","ror":"https://ror.org/03swz6y49"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2900595477","https://openalex.org/W2963417959","https://openalex.org/W3163842339","https://openalex.org/W3177096435","https://openalex.org/W3198953397","https://openalex.org/W4285285995","https://openalex.org/W4293846201","https://openalex.org/W4307110822","https://openalex.org/W4312412605","https://openalex.org/W4312761717","https://openalex.org/W4319300193","https://openalex.org/W4375839990","https://openalex.org/W4385764183","https://openalex.org/W4386072282","https://openalex.org/W4387415233","https://openalex.org/W4401726555","https://openalex.org/W4402917073","https://openalex.org/W4404545585","https://openalex.org/W4411012657","https://openalex.org/W4413146109"],"related_works":[],"abstract_inverted_index":{"Dataset":[0],"distillation":[1,206],"emerges":[2],"as":[3,46],"a":[4,12,17,25,29,79,88,111,158],"promising":[5],"technique":[6],"to":[7,35,57,77,93,107,142,177,202,230,238],"reduce":[8],"web-scale":[9],"datasets":[10],"into":[11,28],"compact":[13,30],"version":[14],"with":[15,136],"only":[16,137],"few":[18],"samples":[19,109],"per":[20,140,155],"class.":[21],"It":[22,251],"involves":[23],"distilling":[24],"large":[26],"dataset":[27,198,205,231],"synthetic":[31],"set":[32],"that":[33,120,130,188],"aims":[34],"preserve":[36],"representative":[37],"information":[38],"from":[39,104],"the":[40,60,146,216,219,224],"original":[41,64],"data,":[42],"offering":[43],"advantages":[44,228],"such":[45],"higher":[47],"training":[48,66,165,199],"efficiency":[49,225],"and":[50,101,226],"data":[51],"privacy.":[52],"However,":[53],"existing":[54],"techniques":[55],"fail":[56],"fully":[58],"capture":[59],"underlying":[61],"properties":[62],"of":[63,148,218,247],"(natural)":[65],"samples.":[67],"Hence,":[68],"learning":[69],"solely":[70],"on":[71,117,152],"distilled":[72,95,108,134,220],"images-the":[73],"standard":[74],"practice-leads":[75],"models":[76],"encounter":[78],"notable":[80],"generalization":[81,125],"gap.":[82],"In":[83],"this":[84],"work,":[85],"we":[86,128],"propose":[87],"simple":[89,112],"yet":[90],"effective":[91],"mechanism":[92],"enhance":[94],"images.":[96],"Our":[97],"method":[98,122,190,210,235],"transfers":[99],"powerful":[100],"discriminative":[102],"characteristics":[103],"natural":[105],"images":[106,139,154],"through":[110],"mixing":[113],"process.":[114],"Extensive":[115],"experiments":[116],"benchmarks":[118],"confirm":[119,187],"our":[121,131,189,209,234],"consistently":[123],"improves":[124,253],"accuracy.":[126],"Notably,":[127],"demonstrate":[129],"approach":[132],"enables":[133],"sets":[135],"10":[138],"class":[141],"match":[143],"or":[144],"exceed":[145],"performance":[147,174,243],"state-of-the-art":[149,204],"methods":[150],"trained":[151],"50":[153],"class,":[156],"representing":[157],"<tex":[159,178],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[160,179],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$5":[161],"\\times$</tex>":[162],"gain":[163],"in":[164],"efficiency.":[166],"On":[167],"challenging":[168],"ImageNet":[169],"subsets,":[170],"it":[171],"increases":[172],"predictive":[173,242],"by":[175,244],"up":[176],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{1":[180],"1.":[181],"5}$</tex>":[182],"percentage":[183,249],"points.":[184,250],"We":[185],"also":[186,252],"more":[191],"effectively":[192],"preserves":[193],"internal":[194],"representations":[195],"concerning":[196],"full":[197],"when":[200],"compared":[201],"plain":[203],"methods.":[207],"Crucially,":[208],"achieves":[211],"these":[212],"improvements":[213],"without":[214],"increasing":[215],"size":[217],"set,":[221],"thus":[222],"preserving":[223],"privacy":[227],"inherent":[229],"distillation.":[232],"Moreover,":[233],"enhances":[236],"robustness":[237],"common":[239],"corruptions,":[240],"improving":[241],"an":[245],"average":[246],"9.05":[248],"accuracy":[254],"against":[255],"moderate":[256],"adversarial":[257],"attacks.":[258],"Code":[259],"is":[260],"available":[261],"at:":[262],"github.com/IanPons/Enhancing-Distilled-Datasets":[263]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-11T00:00:00"}
