{"id":"https://openalex.org/W4392822291","doi":"https://doi.org/10.48550/arxiv.2403.08100","title":"Efficient Language Model Architectures for Differentially Private Federated Learning","display_name":"Efficient Language Model Architectures for Differentially Private Federated Learning","publication_year":2024,"publication_date":"2024-03-12","ids":{"openalex":"https://openalex.org/W4392822291","doi":"https://doi.org/10.48550/arxiv.2403.08100"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.08100","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.08100","pdf_url":"https://arxiv.org/pdf/2403.08100","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.08100","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075278288","display_name":"Jae Hun Ro","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ro, Jae Hun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057451920","display_name":"Srinadh Bhojanapalli","orcid":"https://orcid.org/0000-0002-4147-2106"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhojanapalli, Srinadh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101283231","display_name":"Zheng Xu","orcid":"https://orcid.org/0009-0006-9256-7937"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108019598","display_name":"Yanxiang Zhang","orcid":"https://orcid.org/0000-0002-9318-0324"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yanxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103890688","display_name":"Ananda Theertha Suresh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suresh, Ananda Theertha","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075278288"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9164999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6702380180358887},{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.5413285493850708},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3550705015659332},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3542234003543854},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3316792845726013}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6702380180358887},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.5413285493850708},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3550705015659332},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3542234003543854},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3316792845726013}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.08100","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.08100","pdf_url":"https://arxiv.org/pdf/2403.08100","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.08100","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.08100","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.08100","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.08100","pdf_url":"https://arxiv.org/pdf/2403.08100","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392822291.pdf","grobid_xml":"https://content.openalex.org/works/W4392822291.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4298221930","https://openalex.org/W2390279801","https://openalex.org/W2777914285","https://openalex.org/W2358668433","https://openalex.org/W4287823391","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3013363440","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Cross-device":[0],"federated":[1,151,181],"learning":[2,152,182],"(FL)":[3],"is":[4,26,164],"a":[5,9,93],"technique":[6],"that":[7,77,116,142],"trains":[8],"model":[10,119,131],"on":[11,32],"data":[12,21],"distributed":[13],"across":[14],"typically":[15],"millions":[16],"of":[17,49,66,153],"edge":[18],"devices":[19],"without":[20],"leaving":[22],"the":[23,27,105,111,127,143,160],"devices.":[24],"SGD":[25,84],"standard":[28,128],"client":[29,85],"optimizer":[30],"for":[31,39],"device":[33],"training":[34,48],"in":[35,46,110,132,135,150,180],"cross-device":[36,133],"FL,":[37],"favored":[38],"its":[40],"memory":[41],"and":[42,62,87,107,114,122],"computational":[43],"efficiency.":[44],"However,":[45],"centralized":[47],"neural":[50],"language":[51,71],"models,":[52],"adaptive":[53],"optimizers":[54,86],"are":[55],"preferred":[56],"as":[57],"they":[58,78],"offer":[59],"improved":[60,176],"stability":[61],"performance.":[63],"In":[64],"light":[65],"this,":[67],"we":[68,158],"ask":[69],"if":[70],"models":[72],"can":[73,79],"be":[74,80],"modified":[75],"such":[76],"efficiently":[81],"trained":[82],"with":[83,167,183],"answer":[88],"this":[89,117],"affirmatively.":[90],"We":[91,139],"propose":[92],"scale-invariant":[94],"Coupled":[95],"Input":[96],"Forget":[97],"Gate":[98],"(SI":[99],"CIFG)":[100],"recurrent":[101,112,130],"network":[102],"by":[103],"modifying":[104],"sigmoid":[106],"tanh":[108],"activations":[109],"cell":[113],"show":[115,141],"new":[118],"converges":[120],"faster":[121],"achieves":[123],"better":[124],"utility":[125,178],"than":[126],"CIFG":[129],"FL":[134],"large":[136],"scale":[137,145,161],"experiments.":[138],"further":[140],"proposed":[144],"invariant":[146,162],"modification":[147,163],"also":[148,165],"helps":[149],"larger":[154],"transformer":[155],"models.":[156],"Finally,":[157],"demonstrate":[159],"compatible":[166],"other":[168],"non-adaptive":[169],"algorithms.":[170],"Particularly,":[171],"our":[172],"results":[173],"suggest":[174],"an":[175],"privacy":[177],"trade-off":[179],"differential":[184],"privacy.":[185]},"counts_by_year":[],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2024-03-15T00:00:00"}
