{"id":"https://openalex.org/W7154512264","doi":"https://doi.org/10.48550/arxiv.2604.12160","title":"PubSwap: Public-Data Off-Policy Coordination for Federated RLVR","display_name":"PubSwap: Public-Data Off-Policy Coordination for Federated RLVR","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7154512264","doi":"https://doi.org/10.48550/arxiv.2604.12160"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.12160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.12160","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124776486","display_name":"Anupam Nayak","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nayak, Anupam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124852459","display_name":"Baris Askin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Askin, Baris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133703814","display_name":"Muhammed Ustaomeroglu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ustaomeroglu, Muhammed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133717880","display_name":"Carlee Joe-Wong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joe-Wong, Carlee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133673932","display_name":"Gauri Joshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joshi, Gauri","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5124776486"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.19040000438690186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.19040000438690186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.14249999821186066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12790000438690186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.6614999771118164},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.588699996471405},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5644999742507935},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5514000058174133},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5346999764442444},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5056999921798706},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4699000120162964}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8044999837875366},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.6614999771118164},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5644999742507935},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5514000058174133},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5346999764442444},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5056999921798706},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4699000120162964},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.38909998536109924},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3522000014781952},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31529998779296875},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2809999883174896},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C15845906","wikidata":"https://www.wikidata.org/wiki/Q1172338","display_name":"Data exchange","level":2,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.12160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.12160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reasoning":[0],"post-training":[1],"with":[2,68,120,171],"reinforcement":[3],"learning":[4],"from":[5,138],"verifiable":[6],"rewards":[7],"(RLVR)":[8],"is":[9,28,38,42,87],"typically":[10],"studied":[11],"in":[12,35],"centralized":[13],"settings,":[14],"yet":[15],"many":[16,46],"realistic":[17],"applications":[18],"involve":[19],"decentralized":[20],"private":[21,111],"data":[22],"distributed":[23],"across":[24,97],"organizations.":[25],"Federated":[26],"training":[27,95,129],"a":[29,59,82,100,104,159],"natural":[30],"solution,":[31],"but":[32],"scaling":[33],"RLVR":[34,61],"this":[36],"regime":[37],"challenging:":[39],"full-model":[40],"synchronization":[41],"expensive,":[43],"and":[44,77,92,143,147,161],"performing":[45],"local":[47,66,133],"steps":[48,71],"can":[49],"cause":[50],"severe":[51],"client":[52],"drift":[53],"under":[54],"heterogeneous":[55],"data.":[56,112],"We":[57],"propose":[58],"federated":[60,165],"framework":[62],"that":[63],"combines":[64],"LoRA-based":[65],"adaptation":[67],"public-data-based":[69],"off-policy":[70],"to":[72,89,131],"improve":[73],"both":[74],"communication":[75,170],"efficiency":[76],"cross-client":[78,139],"coordination.":[79,140,174],"In":[80],"particular,":[81],"small":[83],"shared":[84],"public":[85],"dataset":[86],"used":[88],"periodically":[90],"exchange":[91],"reuse":[93],"response-level":[94],"signals":[96],"organizations,":[98],"providing":[99],"lightweight":[101],"anchor":[102],"toward":[103],"more":[105],"globally":[106,121],"aligned":[107],"objective":[108],"without":[109],"exposing":[110],"Our":[113,156],"method":[114,150],"selectively":[115],"replaces":[116],"locally":[117],"incorrect":[118],"responses":[119],"correct":[122],"ones":[123],"during":[124],"public-data":[125,173],"steps,":[126],"thereby":[127],"keeping":[128],"closer":[130],"the":[132],"policy":[134],"while":[135],"still":[136],"benefiting":[137],"Across":[141],"mathematical":[142],"medical":[144],"reasoning":[145,166],"benchmarks":[146],"models,":[148],"our":[149],"consistently":[151],"improves":[152],"over":[153],"standard":[154],"baselines.":[155],"results":[157],"highlight":[158],"simple":[160],"effective":[162],"recipe":[163],"for":[164],"post-training:":[167],"combining":[168],"low-rank":[169],"limited":[172]},"counts_by_year":[],"updated_date":"2026-04-16T06:09:31.884825","created_date":"2026-04-16T00:00:00"}
