{"id":"https://openalex.org/W7160177113","doi":"https://doi.org/10.48550/arxiv.2605.00185","title":"Fair Dataset Distillation via Cross-Group Barycenter Alignment","display_name":"Fair Dataset Distillation via Cross-Group Barycenter Alignment","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7160177113","doi":"https://doi.org/10.48550/arxiv.2605.00185"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.00185","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00185","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.00185","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094087306","display_name":"Mohammad Hossein Moslemi","orcid":"https://orcid.org/0009-0002-0278-4665"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moslemi, Mohammad Hossein","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5097083956","display_name":"Nima Hosseini Dashtbayaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dashtbayaz, Nima Hosseini","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135232656","display_name":"Zhimin Mei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mei, Zhimin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135121391","display_name":"Boyu Wang","orcid":"https://orcid.org/0009-0002-6864-3590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghaddar, Bissan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135230719","display_name":"Bissan Ghaddar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Boyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.8809000253677368,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.8809000253677368,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.020800000056624413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.019999999552965164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.83160001039505},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.6578999757766724},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6258999705314636},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.5593000054359436},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.31220000982284546},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.28780001401901245}],"concepts":[{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.83160001039505},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.6578999757766724},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6290000081062317},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6258999705314636},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.5593000054359436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.478300005197525},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4438000023365021},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44350001215934753},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3384999930858612},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C2778058735","wikidata":"https://www.wikidata.org/wiki/Q4692253","display_name":"Aggregate data","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.00185","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00185","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.00185","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00185","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dataset":[0],"Distillation":[1],"aims":[2],"to":[3,32,65],"compress":[4],"a":[5,9,113],"large":[6],"dataset":[7,162],"into":[8],"small":[10],"synthetic":[11],"one":[12],"while":[13],"maintaining":[14],"predictive":[15,26,87,118],"performance.":[16],"We":[17,95],"show":[18,135,154],"that":[19,120,136,155],"as":[20,111],"different":[21],"demographic":[22],"groups":[23],"exhibit":[24],"distinct":[25],"patterns,":[27],"the":[28,99,109,117],"distillation":[29,149],"process":[30],"struggles":[31],"simultaneously":[33],"preserve":[34],"informative":[35],"signals":[36],"for":[37,61],"all":[38,125],"subgroups,":[39,63],"regardless":[40],"of":[41,105,116],"whether":[42],"group":[43,77,137],"sizes":[44],"are":[45],"mildly":[46],"or":[47],"severely":[48],"imbalanced.":[49],"Consequently,":[50],"models":[51],"trained":[52],"on":[53],"distilled":[54],"data":[55],"can":[56,140],"experience":[57],"substantial":[58],"performance":[59],"drops":[60],"certain":[62],"leading":[64],"fairness":[66,138],"gaps.":[67],"Crucially,":[68],"these":[69,102],"gaps":[70],"do":[71],"not":[72],"disappear":[73],"by":[74,161],"merely":[75],"correcting":[76],"imbalance,":[78],"since":[79],"they":[80],"stem":[81],"from":[82,91],"fundamental":[83],"mismatches":[84],"in":[85],"subgroup":[86],"patterns":[88],"rather":[89],"than":[90],"sample-size":[92],"disparities":[93],"alone.":[94],"therefore":[96],"formally":[97],"analyze":[98],"interaction":[100],"between":[101],"two":[103],"sources":[104],"bias":[106,159],"and":[107,151],"cast":[108],"solution":[110],"identifying":[112],"group-imbalance-agnostic":[114],"barycenter":[115],"information":[119],"induces":[121],"similar":[122],"representations":[123],"across":[124],"subgroups.":[126],"By":[127],"distilling":[128],"toward":[129],"this":[130],"shared":[131],"aggregate":[132],"representation,":[133],"we":[134],"concerns":[139],"be":[141],"reduced.":[142],"Our":[143],"approach":[144],"is":[145,165],"compatible":[146],"with":[147],"existing":[148],"methods,":[150],"empirical":[152],"results":[153],"it":[156],"substantially":[157],"reduces":[158],"introduced":[160],"distillation.":[163],"Code":[164],"available":[166],"at":[167],"https://github.com/mhmoslemi/COBRA.":[168]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-05T00:00:00"}
