{"id":"https://openalex.org/W4401443407","doi":"https://doi.org/10.1109/access.2024.3441242","title":"Massively High-Throughput Reinforcement Learning for Classic Control on GPUs","display_name":"Massively High-Throughput Reinforcement Learning for Classic Control on GPUs","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4401443407","doi":"https://doi.org/10.1109/access.2024.3441242"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3441242","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3441242","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3441242","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106368422","display_name":"Xuan Sha","orcid":"https://orcid.org/0009-0006-3301-4523"},"institutions":[{"id":"https://openalex.org/I4403386554","display_name":"Southeast University Chengxian College","ror":"https://ror.org/03mfb3630","country_code":null,"type":"education","lineage":["https://openalex.org/I4403386554"]},{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Sha","raw_affiliation_strings":["School of Civil and Transportation Engineering, Southeast University Chengxian College, Nanjing, Jiangsu, China"],"raw_orcid":"https://orcid.org/0009-0006-3301-4523","affiliations":[{"raw_affiliation_string":"School of Civil and Transportation Engineering, Southeast University Chengxian College, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I4403386554","https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":null,"display_name":"Tian Lan","orcid":"https://orcid.org/0009-0004-6857-6804"},"institutions":[{"id":"https://openalex.org/I4210155268","display_name":"Salesforce (United States)","ror":"https://ror.org/057315g56","country_code":"US","type":"company","lineage":["https://openalex.org/I4210155268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tian Lan","raw_affiliation_strings":["Salesforce AI Research, Palo Alto, CA, USA"],"raw_orcid":"https://orcid.org/0009-0004-6857-6804","affiliations":[{"raw_affiliation_string":"Salesforce AI Research, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I4210155268"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.4869,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71009928,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"12","issue":null,"first_page":"117737","last_page":"117744"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8111414909362793},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.7253175377845764},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.7009220123291016},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6079625487327576},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5734262466430664},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.36322087049484253},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2647170424461365},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15227559208869934}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8111414909362793},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.7253175377845764},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.7009220123291016},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6079625487327576},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5734262466430664},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.36322087049484253},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2647170424461365},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15227559208869934},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3441242","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3441242","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7451d76d71be42dbad253128b45da8ba","is_oa":true,"landing_page_url":"https://doaj.org/article/7451d76d71be42dbad253128b45da8ba","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 117737-117744 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3441242","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3441242","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4110719389","display_name":null,"funder_award_id":"2022SJYB0707","funder_id":"https://openalex.org/F4320327500","funder_display_name":"Jiangsu University of Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320327500","display_name":"Jiangsu University of Science and Technology","ror":"https://ror.org/00tyjp878"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2122410182","https://openalex.org/W2245493112","https://openalex.org/W2575705757","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2982316857","https://openalex.org/W3005057496","https://openalex.org/W3021135564","https://openalex.org/W3109546547","https://openalex.org/W3126321819","https://openalex.org/W3153278392","https://openalex.org/W3172279798","https://openalex.org/W3196741622","https://openalex.org/W3196764258","https://openalex.org/W3203406184","https://openalex.org/W3204553045","https://openalex.org/W3213974477","https://openalex.org/W4228997196","https://openalex.org/W4282008490","https://openalex.org/W4287113927","https://openalex.org/W4296611641","https://openalex.org/W4297797010","https://openalex.org/W4389911035","https://openalex.org/W4390343183","https://openalex.org/W6683153233","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6748638692","https://openalex.org/W6773373605","https://openalex.org/W6779380009","https://openalex.org/W6780559895","https://openalex.org/W6793574630","https://openalex.org/W6800004206","https://openalex.org/W6800919033"],"related_works":["https://openalex.org/W2023839151","https://openalex.org/W1774183074","https://openalex.org/W2057488824","https://openalex.org/W2334687145","https://openalex.org/W2178011914","https://openalex.org/W4235962491","https://openalex.org/W2061778832","https://openalex.org/W1513001507","https://openalex.org/W2257153718","https://openalex.org/W3134702077"],"abstract_inverted_index":{"This":[0],"study":[1],"presents":[2],"a":[3,74,86],"novel":[4],"massively":[5],"high-throughput":[6],"reinforcement":[7],"learning":[8],"(RL)":[9],"framework":[10],"specifically":[11],"designed":[12],"for":[13,25],"addressing":[14],"classic":[15,46],"control":[16,47],"problems,":[17,48],"leveraging":[18],"our":[19,37,113],"proposed":[20],"architecture":[21],"and":[22,52,58],"algorithms":[23],"optimized":[24],"efficient":[26],"concurrent":[27,64],"computations":[28],"on":[29],"GPUs.":[30],"Our":[31],"research":[32],"demonstrates":[33],"the":[34,82,94],"effectiveness":[35],"of":[36,77,84,96],"methods":[38],"in":[39,112],"efficiently":[40],"training":[41,102],"RL":[42,71],"agents":[43],"across":[44],"various":[45],"encompassing":[49],"both":[50],"discrete":[51],"continuous":[53],"domains,":[54],"while":[55],"achieving":[56],"rapid":[57],"stable":[59],"performance":[60],"up":[61],"to":[62],"10K":[63],"environment":[65],"instances.":[66],"Furthermore,":[67],"we":[68,91],"observe":[69],"that":[70,93],"exploration":[72],"with":[73],"large":[75],"number":[76],"parallel":[78],"instances":[79],"significantly":[80],"enhances":[81],"stability":[83,95],"updating":[85],"shared":[87],"model.":[88],"For":[89],"instance,":[90],"show":[92],"Deep":[97],"Deterministic":[98],"Policy":[99],"Gradient":[100],"(DDPG)":[101],"can":[103],"be":[104],"achieved":[105],"without":[106],"requiring":[107],"experience":[108],"replay,":[109],"as":[110],"evidenced":[111],"study.":[114]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-07-22T07:51:19.307946","created_date":"2025-10-10T00:00:00"}