{"id":"https://openalex.org/W4401507995","doi":"https://doi.org/10.1109/infocom52122.2024.10621140","title":"Federated Offline Policy Optimization with Dual Regularization","display_name":"Federated Offline Policy Optimization with Dual Regularization","publication_year":2024,"publication_date":"2024-05-20","ids":{"openalex":"https://openalex.org/W4401507995","doi":"https://doi.org/10.1109/infocom52122.2024.10621140"},"language":"en","primary_location":{"id":"doi:10.1109/infocom52122.2024.10621140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocom52122.2024.10621140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2024 - IEEE Conference on Computer Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113844201","display_name":"Sheng Yue","orcid":"https://orcid.org/0009-0005-4973-6136"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sheng Yue","raw_affiliation_strings":["BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036502769","display_name":"Zerui Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zerui Qin","raw_affiliation_strings":["BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114171382","display_name":"Xingyuan Hua","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingyuan Hua","raw_affiliation_strings":["Beijing Institute of Technology,School of Computer Science and Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037386169","display_name":"Yongheng Deng","orcid":"https://orcid.org/0000-0003-3010-3812"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongheng Deng","raw_affiliation_strings":["BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015419107","display_name":"Ju Ren","orcid":"https://orcid.org/0000-0003-2782-183X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ju Ren","raw_affiliation_strings":["BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"],"affiliations":[{"raw_affiliation_string":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113844201"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.7305,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71809675,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"811","last_page":"820"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6305316686630249},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.628130316734314},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.46442046761512756},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3445450961589813},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21935304999351501},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14082017540931702}],"concepts":[{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6305316686630249},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.628130316734314},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.46442046761512756},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3445450961589813},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21935304999351501},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14082017540931702},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/infocom52122.2024.10621140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocom52122.2024.10621140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2024 - IEEE Conference on Computer Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322392","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W2158782408","https://openalex.org/W2736601468","https://openalex.org/W2911450448","https://openalex.org/W2963864421","https://openalex.org/W2968937098","https://openalex.org/W2995022099","https://openalex.org/W3001141399","https://openalex.org/W3005294098","https://openalex.org/W3009175007","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3088084782","https://openalex.org/W3105018016","https://openalex.org/W3210635777","https://openalex.org/W4248540588","https://openalex.org/W4287272801","https://openalex.org/W4320190429","https://openalex.org/W4375869870","https://openalex.org/W4377562264","https://openalex.org/W4384027004","https://openalex.org/W6677067356","https://openalex.org/W6684921986","https://openalex.org/W6728757088","https://openalex.org/W6741002519","https://openalex.org/W6766011044","https://openalex.org/W6769116889","https://openalex.org/W6773268965","https://openalex.org/W6776438516","https://openalex.org/W6776601253","https://openalex.org/W6777656069","https://openalex.org/W6779265984","https://openalex.org/W6791402389","https://openalex.org/W6791413555","https://openalex.org/W6792049622","https://openalex.org/W6802550813","https://openalex.org/W6809956346","https://openalex.org/W6840065948","https://openalex.org/W6850148118","https://openalex.org/W6852106718"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Federated":[0],"Reinforcement":[1],"Learning":[2],"(FRL)":[3],"has":[4],"been":[5],"deemed":[6],"as":[7],"a":[8,55,71],"promising":[9],"solution":[10],"for":[11],"intelligent":[12],"decision-making":[13],"in":[14,44,108,141],"the":[15,31,90,95,103,114,117,126,149],"era":[16],"of":[17,20,116,153],"Artificial":[18],"Internet":[19],"Things.":[21],"However,":[22],"existing":[23],"FRL":[24],"approaches":[25],"often":[26],"entail":[27],"repeated":[28],"interactions":[29],"with":[30,102],"environment":[32],"during":[33],"local":[34,91],"updating,":[35],"which":[36,64],"can":[37,131],"be":[38],"prohibitively":[39],"expensive":[40],"or":[41],"even":[42],"infeasible":[43],"many":[45],"real-world":[46],"domains.":[47],"To":[48],"overcome":[49],"this":[50,52],"challenge,":[51],"paper":[53],"proposes":[54],"novel":[56],"offline":[57,109],"federated":[58],"policy":[59,73,93,139],"optimization":[60],"algorithm,":[61],"named":[62],"DRPO,":[63],"enables":[65],"distributed":[66],"agents":[67],"to":[68,99],"collaboratively":[69],"learn":[70],"decision":[72],"only":[74],"from":[75],"private":[76],"and":[77,94,136],"static":[78],"data":[79],"without":[80],"further":[81],"environmental":[82],"interactions.":[83],"DRPO":[84,130,154],"leverages":[85],"dual":[86,118],"regularization,":[87],"incorporating":[88],"both":[89],"behavioral":[92],"global":[96],"aggregated":[97],"policy,":[98],"judiciously":[100],"cope":[101],"intrinsic":[104],"two-tier":[105],"distributional":[106,134],"shifts":[107,135],"FRL.":[110],"Theoretical":[111],"analysis":[112],"characterizes":[113],"impact":[115],"regularization":[119],"on":[120],"performance,":[121],"demonstrating":[122],"that":[123],"by":[124],"achieving":[125],"right":[127],"balance":[128],"thereof,":[129],"effectively":[132],"counteract":[133],"ensure":[137],"strict":[138],"improvement":[140],"each":[142],"federative":[143],"learning":[144],"round.":[145],"Extensive":[146],"experiments":[147],"validate":[148],"significant":[150],"performance":[151],"gains":[152],"over":[155],"baseline":[156],"methods.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
