{"id":"https://openalex.org/W4399310835","doi":"https://doi.org/10.1109/tg.2024.3408298","title":"Partial Advantage Estimator for Proximal Policy Optimization","display_name":"Partial Advantage Estimator for Proximal Policy Optimization","publication_year":2024,"publication_date":"2024-06-03","ids":{"openalex":"https://openalex.org/W4399310835","doi":"https://doi.org/10.1109/tg.2024.3408298"},"language":"en","primary_location":{"id":"doi:10.1109/tg.2024.3408298","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2024.3408298","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/98015/2/PAE_for_IEEE_TOG%20%282%29.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043672137","display_name":"Yizhao Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Yizhao Jin","raw_affiliation_strings":["Game AI group, Queen Mary University of London, London, U.K"],"raw_orcid":"https://orcid.org/0009-0006-8923-2562","affiliations":[{"raw_affiliation_string":"Game AI group, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027798845","display_name":"Xiulei Song","orcid":null},"institutions":[{"id":"https://openalex.org/I4210144436","display_name":"Shanghai Huayi Group (China)","ror":"https://ror.org/044f58834","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210144436"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiulei Song","raw_affiliation_strings":["AI Research Group, JumpW, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0003-6792-5788","affiliations":[{"raw_affiliation_string":"AI Research Group, JumpW, Shanghai, China","institution_ids":["https://openalex.org/I4210144436"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037886447","display_name":"Greg Slabaugh","orcid":"https://orcid.org/0000-0003-4060-5226"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gregory Slabaugh","raw_affiliation_strings":["Electrical Engineering and Computer Science, Queen Mary University of London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0003-4060-5226","affiliations":[{"raw_affiliation_string":"Electrical Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062380176","display_name":"Simon M. Lucas","orcid":"https://orcid.org/0000-0002-3180-7451"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon Lucas","raw_affiliation_strings":["Game AI group, Queen Mary University of London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0002-3180-7451","affiliations":[{"raw_affiliation_string":"Game AI group, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5043672137"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":2.3428,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.88502207,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"17","issue":"1","first_page":"158","last_page":"166"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11413","display_name":"Risk and Portfolio Optimization","score":0.09679999947547913,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11413","display_name":"Risk and Portfolio Optimization","score":0.09679999947547913,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13825","display_name":"Economic Policies and Impacts","score":0.09449999779462814,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10991","display_name":"Game Theory and Voting Systems","score":0.08669999986886978,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6466160416603088},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.458048015832901},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4087662100791931},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.34933459758758545},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17213687300682068}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6466160416603088},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.458048015832901},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4087662100791931},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.34933459758758545},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17213687300682068}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tg.2024.3408298","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2024.3408298","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/98015","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/98015","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/98015/2/PAE_for_IEEE_TOG%20%282%29.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/98015","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/98015","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/98015/2/PAE_for_IEEE_TOG%20%282%29.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399310835.pdf","grobid_xml":"https://content.openalex.org/works/W4399310835.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1586189497","https://openalex.org/W1706571876","https://openalex.org/W2119717200","https://openalex.org/W2951360122","https://openalex.org/W2973229164","https://openalex.org/W2978455699","https://openalex.org/W2982316857","https://openalex.org/W3041202696","https://openalex.org/W4327544661","https://openalex.org/W6627932998","https://openalex.org/W6630994837","https://openalex.org/W6638018090","https://openalex.org/W6679257226","https://openalex.org/W6683300800","https://openalex.org/W6684205842","https://openalex.org/W6685444567","https://openalex.org/W6692846177","https://openalex.org/W6747473740","https://openalex.org/W6747924173","https://openalex.org/W6748499953","https://openalex.org/W6748638692","https://openalex.org/W6751629939","https://openalex.org/W6766275931","https://openalex.org/W6778875248","https://openalex.org/W6799777419"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4391375266","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2072657027","https://openalex.org/W2600246793"],"abstract_inverted_index":{"This":[0,69,146],"paper":[1],"proposes":[2],"an":[3,72],"innovative":[4],"approach":[5],"to":[6,12],"the":[7,14,27,37,75,112,121,132,151,181],"Generalized":[8],"Advantage":[9],"Estimator":[10],"(GAE)":[11],"address":[13],"bias-variance":[15,45,76],"trade-off":[16],"in":[17,188,192,205],"truncated":[18,54,86],"roll-outs":[19,55],"during":[20],"reinforcement":[21,206],"learning.":[22,207],"In":[23],"typical":[24],"GAE":[25,109,148,183,187],"implementations,":[26],"k-step":[28],"advantage":[29,113],"is":[30,137],"estimated":[31],"using":[32],"a":[33,66,94,98,107,125,176,197],"lambda-weighted":[34],"average,":[35],"until":[36],"terminal":[38],"state.":[39],"While":[40],"this":[41,135],"method":[42],"provides":[43],"constant":[44],"properties":[46,77],"at":[47],"any":[48],"time":[49,83,90,117],"step,":[50],"it":[51],"often":[52],"necessitates":[53],"with":[56,180],"shorter":[57],"horizons":[58],"for":[59,79,100,115,124,139,200],"faster":[60],"learning":[61],"and":[62,142,159,171],"policy":[63,122,143,202],"updates":[64],"within":[65,85],"single":[67],"episode.":[68],"study":[70],"highlights":[71],"unexplored":[73],"issue:":[74],"differ":[78],"small":[80],"versus":[81],"considerable":[82],"steps":[84,91,118],"roll-outs.":[87],"Specifically,":[88],"smaller":[89],"may":[92],"have":[93],"significant":[95],"bias,":[96],"prompting":[97],"need":[99],"their":[101],"increase.":[102],"The":[103,173],"proposed":[104],"solution":[105],"involves":[106],"partial":[108,147,182],"update,":[110],"calculating":[111],"estimates":[114],"all":[116],"but":[119],"updating":[120],"only":[123],"specified":[126],"range.":[127],"To":[128],"prevent":[129],"data":[130,133,161],"wastage,":[131],"from":[134],"range":[136],"retained":[138],"further":[140],"processing":[141],"parameter":[144],"updates.":[145],"approach,":[149],"despite":[150],"increased":[152],"memory":[153],"requirements,":[154],"promises":[155],"enhanced":[156],"computation":[157],"speed":[158,191],"optimal":[160],"utilization.":[162],"Empirical":[163],"validation":[164],"was":[165],"conducted":[166],"on":[167],"four":[168],"MuJoCo":[169],"tasks":[170],"microRTS.":[172,193],"results":[174],"show":[175],"performance":[177],"improvement":[178],"trend":[179],"estimator,":[184],"outperforming":[185],"regular":[186],"task":[189],"completion":[190],"These":[194],"findings":[195],"offer":[196],"promising":[198],"direction":[199],"improving":[201],"update":[203],"efficiency":[204]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-10T00:00:00"}
