{"id":"https://openalex.org/W3193656970","doi":"https://doi.org/10.1109/access.2021.3106662","title":"An Empirical Investigation of Early Stopping Optimizations in Proximal Policy Optimization","display_name":"An Empirical Investigation of Early Stopping Optimizations in Proximal Policy Optimization","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3193656970","doi":"https://doi.org/10.1109/access.2021.3106662","mag":"3193656970"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3106662","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3106662","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09520424.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09520424.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081912903","display_name":"Rousslan Fernand Julien Dossa","orcid":"https://orcid.org/0000-0003-0572-692X"},"institutions":[{"id":"https://openalex.org/I65837984","display_name":"Kobe University","ror":"https://ror.org/03tgsfw79","country_code":"JP","type":"education","lineage":["https://openalex.org/I65837984"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Rousslan Fernand Julien Dossa","raw_affiliation_strings":["Graduate School of System Informatics, Kobe University, Hyogo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-0572-692X","affiliations":[{"raw_affiliation_string":"Graduate School of System Informatics, Kobe University, Hyogo, Japan","institution_ids":["https://openalex.org/I65837984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101442842","display_name":"Shengyi Huang","orcid":"https://orcid.org/0000-0003-4986-1365"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shengyi Huang","raw_affiliation_strings":["College of Computing & Informatics, Drexel University, Philadelphia, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computing & Informatics, Drexel University, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038686010","display_name":"Santiago Onta\u00f1\u00f3n","orcid":"https://orcid.org/0000-0002-9616-2981"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Santiago Ontanon","raw_affiliation_strings":["College of Computing & Informatics, Drexel University, Philadelphia, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computing & Informatics, Drexel University, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068667478","display_name":"Takashi Matsubara","orcid":"https://orcid.org/0000-0003-0642-4800"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takashi Matsubara","raw_affiliation_strings":["Graduate School of Engineering Science, Osaka University, Osaka, Japan"],"raw_orcid":"https://orcid.org/0000-0003-0642-4800","affiliations":[{"raw_affiliation_string":"Graduate School of Engineering Science, Osaka University, Osaka, Japan","institution_ids":["https://openalex.org/I98285908"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081912903"],"corresponding_institution_ids":["https://openalex.org/I65837984"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.8198,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.87901416,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"9","issue":null,"first_page":"117981","last_page":"117992"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6897139549255371}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6897139549255371}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3106662","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3106662","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09520424.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:bcdbb9c7e9f241aea5725e923918ef61","is_oa":true,"landing_page_url":"https://doaj.org/article/bcdbb9c7e9f241aea5725e923918ef61","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 117981-117992 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3106662","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3106662","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09520424.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1716091338","display_name":null,"funder_award_id":"JPMJMI20B8","funder_id":"https://openalex.org/F4320338243","funder_display_name":"JST-Mirai Program"},{"id":"https://openalex.org/G7800833597","display_name":null,"funder_award_id":"JPMJMI20B8","funder_id":"https://openalex.org/F4320334789","funder_display_name":"Japan Science and Technology Agency"}],"funders":[{"id":"https://openalex.org/F4320320912","display_name":"Ministry of Education, Culture, Sports, Science and Technology","ror":"https://ror.org/048rj2z13"},{"id":"https://openalex.org/F4320334789","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19"},{"id":"https://openalex.org/F4320338243","display_name":"JST-Mirai Program","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3193656970.pdf","grobid_xml":"https://content.openalex.org/works/W3193656970.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1191599655","https://openalex.org/W1522301498","https://openalex.org/W1575592356","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1965555277","https://openalex.org/W2121863487","https://openalex.org/W2155027007","https://openalex.org/W2173248099","https://openalex.org/W2736601468","https://openalex.org/W2749928749","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2914261249","https://openalex.org/W2963864421","https://openalex.org/W2963923407","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2995894173","https://openalex.org/W4298857966","https://openalex.org/W6627932998","https://openalex.org/W6631190155","https://openalex.org/W6634413486","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6683204974","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6741002519","https://openalex.org/W6743802245","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6758978475","https://openalex.org/W6772196467"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Code-level":[0],"optimizations,":[1],"which":[2,82],"are":[3,131,146],"low-level":[4],"optimization":[5,62,80],"techniques":[6],"used":[7,132],"in":[8,24,70,77,133],"the":[9,42,57,71,90,97,103,119,148,155,185],"implementation":[10],"of":[11,27,44,59,122,143,150,157,188],"algorithms,":[12],"have":[13],"generally":[14],"been":[15],"considered":[16],"as":[17,47,64,86,200],"tangential":[18],"and":[19,106,124,175],"often":[20],"do":[21],"not":[22,76],"appear":[23],"published":[25],"pseudo-code":[26],"Reinforcement":[28],"Learning":[29],"(RL)":[30],"algorithms.":[31],"However,":[32],"recent":[33],"studies":[34],"suggest":[35],"these":[36],"optimizations":[37,173,197],"to":[38,41,85,117,154,204],"be":[39],"critical":[40],"performance":[43,149],"algorithms":[45],"such":[46,61,180],"Proximal":[48],"Policy":[49],"Optimization":[50],"(PPO).":[51],"In":[52],"this":[53],"paper,":[54],"we":[55,83,114],"investigate":[56],"effect":[58],"one":[60],"known":[63],"\u201cearly":[65],"stopping\u201d":[66],"implemented":[67],"for":[68],"PPO":[69,151],"popular":[72],"openai/spinningup":[73],"library":[74],"but":[75],"openai/baselines.":[78],"This":[79],"technique,":[81],"refer":[84],"KLE-Stop,":[87],"can":[88],"stop":[89],"policy":[91,105,108],"update":[92,158,189],"within":[93,191],"an":[94,192],"epoch":[95,161],"if":[96],"mean":[98],"Kullback-Leibler":[99],"(KL)":[100],"Divergence":[101],"between":[102],"target":[104],"current":[107],"becomes":[109],"too":[110],"high.":[111],"More":[112],"specifically,":[113],"conduct":[115],"experiments":[116,145],"examine":[118],"empirical":[120],"importance":[121],"KLE-Stop":[123],"its":[125],"conservative":[126],"variant":[127],"KLE-Rollback":[128],"when":[129],"they":[130],"conjunction":[134],"with":[135],"other":[136],"common":[137],"code-level":[138],"optimizations.":[139],"The":[140],"main":[141],"findings":[142],"our":[144],"1)":[147],"is":[152],"sensitive":[153],"number":[156,187],"iterations":[159,190],"per":[160],"(":[162],"<inline-formula":[163,207],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[164,178,208],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[165,209],"<tex-math":[166,210],"notation=\"LaTeX\">$K$":[167,211],"</tex-math></inline-formula>":[168,212],"),":[169],"2)":[170],"Early":[171,195],"stopping":[172,196],"(KLE-Stop":[174],"KLE-Rollback)":[176],"<italic":[177],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">mitigate</i>":[179],"sensitivity":[181],"by":[182],"dynamically":[183],"adjusting":[184],"actual":[186],"epoch,":[193],"3)":[194],"could":[198],"serve":[199],"a":[201],"convenient":[202],"alternative":[203],"tuning":[205],"on":[206],".":[213]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
