{"id":"https://openalex.org/W4399344331","doi":"https://doi.org/10.1145/3626183.3659967","title":"Efficient Parallel Reinforcement Learning Framework Using the Reactor Model","display_name":"Efficient Parallel Reinforcement Learning Framework Using the Reactor Model","publication_year":2024,"publication_date":"2024-06-04","ids":{"openalex":"https://openalex.org/W4399344331","doi":"https://doi.org/10.1145/3626183.3659967"},"language":"en","primary_location":{"id":"doi:10.1145/3626183.3659967","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626183.3659967","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626183.3659967?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3626183.3659967?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103168045","display_name":"Jacky Kwok","orcid":"https://orcid.org/0009-0007-1482-2768"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jacky Kwok","raw_affiliation_strings":["UC Berkeley, Berkeley, USA"],"raw_orcid":"https://orcid.org/0009-0007-1482-2768","affiliations":[{"raw_affiliation_string":"UC Berkeley, Berkeley, USA","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019424497","display_name":"Marten Lohstroh","orcid":"https://orcid.org/0000-0001-8833-4117"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marten Lohstroh","raw_affiliation_strings":["UC Berkeley, Berkeley, USA"],"raw_orcid":"https://orcid.org/0000-0001-8833-4117","affiliations":[{"raw_affiliation_string":"UC Berkeley, Berkeley, USA","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009831760","display_name":"Edward A. Lee","orcid":"https://orcid.org/0000-0002-5663-0584"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward A. Lee","raw_affiliation_strings":["UC Berkeley, Berkeley, USA"],"raw_orcid":"https://orcid.org/0000-0002-5663-0584","affiliations":[{"raw_affiliation_string":"UC Berkeley, Berkeley, USA","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103168045"],"corresponding_institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":0.2003,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.47766364,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"41","last_page":"51"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8290631771087646},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7222318649291992},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41437017917633057},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.323628306388855}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8290631771087646},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7222318649291992},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41437017917633057},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.323628306388855}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626183.3659967","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626183.3659967","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626183.3659967?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3626183.3659967","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626183.3659967","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626183.3659967?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM Symposium on Parallelism in Algorithms and Architectures","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399344331.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1464569014","https://openalex.org/W1591459546","https://openalex.org/W2159542925","https://openalex.org/W2746553466","https://openalex.org/W2824027552","https://openalex.org/W2968526727","https://openalex.org/W3009928773","https://openalex.org/W3081168214","https://openalex.org/W3094567822","https://openalex.org/W3119696993","https://openalex.org/W3135499934","https://openalex.org/W3160830781","https://openalex.org/W3214226605","https://openalex.org/W4295312788","https://openalex.org/W4296611641","https://openalex.org/W4300655963","https://openalex.org/W4321150118","https://openalex.org/W4375851997","https://openalex.org/W4385282682","https://openalex.org/W4386254894"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109","https://openalex.org/W4362501864","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Parallel":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"frameworks":[4],"are":[5,45],"essential":[6],"for":[7,16,99,107,148],"mapping":[8],"RL":[9,53,149,189],"workloads":[10],"to":[11,85,95,143,153],"multiple":[12],"computational":[13,28],"resources,":[14],"allowing":[15],"faster":[17],"generation":[18],"of":[19,22,34,83,180],"samples,":[20],"estimation":[21],"values,":[23],"and":[24,37,59,104,112,135,164,172,186],"policy":[25],"improvement.":[26],"These":[27],"paradigms":[29],"require":[30],"a":[31,64,73,81,87,121,137,156],"seamless":[32],"integration":[33],"training,":[35],"serving,":[36],"simulation":[38,167],"workloads.":[39],"Existing":[40],"frameworks,":[41],"such":[42,101],"as":[43,102],"Ray,":[44],"not":[46],"managing":[47],"this":[48,68],"orchestration":[49],"efficiently,":[50],"especially":[51],"in":[52,133,169],"tasks":[54],"that":[55,140],"demand":[56],"intensive":[57],"input/output":[58],"synchronization":[60],"between":[61],"actors":[62,84],"on":[63,125,155],"single":[65],"node.":[66],"In":[67,151],"study,":[69],"we":[70],"have":[71,86],"proposed":[72],"solution":[74],"implementing":[75],"the":[76,93,126,176],"reactor":[77,127],"model,":[78,128],"which":[79],"enforces":[80],"set":[82],"fixed":[88],"communication":[89],"pattern.":[90],"This":[91],"allows":[92,141],"scheduler":[94],"eliminate":[96],"work":[97],"needed":[98],"synchronization,":[100],"acquiring":[103],"releasing":[105],"locks":[106],"each":[108],"actor":[109],"or":[110],"sending":[111],"processing":[113],"coordination-related":[114],"messages.":[115],"Our":[116],"framework,":[117],"Lingua":[118],"Franca":[119],"(LF),":[120],"coordination":[122],"language":[123],"based":[124],"also":[129],"supports":[130],"true":[131],"parallelism":[132],"Python":[134],"provides":[136],"unified":[138],"interface":[139],"users":[142],"automatically":[144],"generate":[145],"dataflow":[146],"graphs":[147],"tasks.":[150],"comparison":[152],"Ray":[154],"single-node":[157],"multi-core":[158],"compute":[159],"platform,":[160],"LF":[161],"achieves":[162],"1.21x":[163],"11.62x":[165],"higher":[166],"throughput":[168],"OpenAI":[170],"Gym":[171],"Atari":[173],"environments,":[174],"reduces":[175],"average":[177],"training":[178],"time":[179],"synchronized":[181],"parallel":[182],"Q-learning":[183],"by":[184,191],"31.2%,":[185],"accelerates":[187],"multi-agent":[188],"inference":[190],"5.12x.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
