{"id":"https://openalex.org/W7161305957","doi":"https://doi.org/10.48550/arxiv.2605.13936","title":"Towards the Next Frontier of LLMs, Training on Private Data: A Cross-Domain Benchmark for Federated Fine-Tuning","display_name":"Towards the Next Frontier of LLMs, Training on Private Data: A Cross-Domain Benchmark for Federated Fine-Tuning","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161305957","doi":"https://doi.org/10.48550/arxiv.2605.13936"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136203395","display_name":"Daniel M. Jimenez-Gutierrez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jimenez-Gutierrez, Daniel M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088702266","display_name":"Enrique Zuazua","orcid":"https://orcid.org/0000-0002-1377-0958"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuazua, Enrique","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136228051","display_name":"Georgios Kellaris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kellaris, Georgios","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136200780","display_name":"Joaquin del Rio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"del Rio, Joaquin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136226327","display_name":"Oleksii Sliusarenko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sliusarenko, Oleksii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120319798","display_name":"Xabi Uribe-Etxebarria","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Uribe-Etxebarria, Xabi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2994000017642975,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2994000017642975,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.06120000034570694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.051600001752376556,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6934999823570251},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.5860000252723694},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.5537999868392944},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5047000050544739},{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.48100000619888306},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.43799999356269836},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.4259999990463257},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.3714999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7102000117301941},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6934999823570251},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.5860000252723694},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.5537999868392944},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5047000050544739},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.48429998755455017},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.48100000619888306},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.43799999356269836},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.4259999990463257},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3714999854564667},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3343999981880188},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C141513077","wikidata":"https://www.wikidata.org/wiki/Q378542","display_name":"Independent and identically distributed random variables","level":3,"score":0.32350000739097595},{"id":"https://openalex.org/C121426985","wikidata":"https://www.wikidata.org/wiki/Q591763","display_name":"Private sector","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C147859227","wikidata":"https://www.wikidata.org/wiki/Q294217","display_name":"Public sector","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.5069059133529663}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"recent":[1],"success":[2],"of":[3,28],"large":[4],"language":[5],"models":[6],"(LLMs)":[7],"has":[8],"been":[9],"largely":[10],"driven":[11],"by":[12,87],"vast":[13],"public":[14,25],"datasets.":[15],"However,":[16],"the":[17,29,143],"next":[18],"frontier":[19],"for":[20,132,243],"LLM":[21,133,157],"development":[22],"lies":[23],"beyond":[24],"data.":[26,161],"Much":[27],"world's":[30],"most":[31],"valuable":[32],"information":[33],"is":[34],"private,":[35],"especially":[36],"in":[37,106,170],"highly":[38],"regulated":[39],"sectors":[40],"such":[41],"as":[42,239],"healthcare":[43,171],"and":[44,69,85,90,99,113,128,172,179,185,195,217,228],"finance,":[45,173],"where":[46,246],"data":[47,56,75,109,131,139,204,247],"include":[48],"patient":[49],"histories":[50],"or":[51],"customer":[52],"communications.":[53],"Unlocking":[54],"this":[55,118,164],"could":[57],"represent":[58],"a":[59,122,155,167,223,240],"major":[60],"leap":[61],"forward,":[62],"enabling":[63],"LLMs":[64,245],"with":[65,232],"deeper":[66],"domain":[67],"expertise":[68],"stronger":[70],"real-world":[71],"utility.":[72],"Yet,":[73],"these":[74],"cannot":[76,248],"be":[77,249],"shared":[78,156],"because":[79],"they":[80],"are":[81,96],"distributed":[82,101,129],"across":[83,104,138],"institutions":[84],"constrained":[86],"privacy,":[88],"regulatory,":[89],"organizational":[91],"barriers.":[92],"Moreover,":[93],"institutional":[94,130,203],"datasets":[95],"typically":[97],"non-independent":[98],"identically":[100],"(non-IID),":[102],"differing":[103],"sites":[105],"population":[107],"characteristics,":[108],"modalities,":[110],"documentation":[111],"patterns,":[112],"task-specific":[114],"label":[115],"distributions.":[116],"In":[117],"paper,":[119],"we":[120],"demonstrate":[121],"practical":[123],"approach":[124,165,242],"to":[125,152,214],"unlocking":[126],"private":[127,160],"adaptation":[134],"through":[135,166],"federated":[136,210,237],"collaboration":[137],"silos.":[140],"Built":[141],"on":[142],"Sherpa.ai":[144],"Federated":[145],"Learning":[146],"platform,":[147],"our":[148],"framework":[149],"enables":[150],"nodes":[151],"jointly":[153],"fine-tune":[154],"without":[158],"exchanging":[159],"We":[162,187],"evaluate":[163],"cross-domain":[168],"benchmark":[169],"using":[174],"four":[175],"closed-ended":[176],"question":[177],"answering":[178],"classification":[180],"datasets:":[181],"MedQA,":[182],"MedMCQA,":[183],"FPB,":[184],"FiQA-SA.":[186],"compare":[188],"three":[189],"parameter-efficient":[190],"fine-tuning":[191,211],"(PEFT)":[192],"strategies-LoRA,":[193],"QLoRA,":[194],"IA3-across":[196],"pretrained":[197],"backbones":[198],"under":[199],"non-IID":[200],"settings":[201],"reflecting":[202],"heterogeneity.":[205],"Our":[206],"results":[207],"show":[208],"that":[209],"performs":[212],"close":[213],"centralized":[215],"training":[216],"outperforms":[218],"isolated":[219],"single-institution":[220],"learning.":[221],"From":[222],"Green":[224],"AI":[225],"perspective,":[226],"QLoRA":[227],"IA3":[229],"improve":[230],"efficiency":[231],"limited":[233],"accuracy":[234],"degradation,":[235],"supporting":[236],"PEFT":[238],"viable":[241],"adapting":[244],"shared.":[250]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
