{"id":"https://openalex.org/W7138431587","doi":"https://doi.org/10.1609/aaai.v40i30.39689","title":"LPPG-RL: Lexicographically Projected Policy Gradient Reinforcement Learning with Subproblem Exploration","display_name":"LPPG-RL: Lexicographically Projected Policy Gradient Reinforcement Learning with Subproblem Exploration","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138431587","doi":"https://doi.org/10.1609/aaai.v40i30.39689"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i30.39689","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i30.39689","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39689/43650","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39689/43650","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020341921","display_name":"Ruiyu Qiu","orcid":"https://orcid.org/0009-0004-2458-049X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruiyu Qiu","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704745","display_name":"Rui Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Wang","raw_affiliation_strings":["Ningbo University"],"affiliations":[{"raw_affiliation_string":"Ningbo University","institution_ids":["https://openalex.org/I109935558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013267252","display_name":"Guanghui Yang","orcid":"https://orcid.org/0000-0002-4396-0074"},"institutions":[{"id":"https://openalex.org/I3018263800","display_name":"Huzhou University","ror":"https://ror.org/04mvpxy20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3018263800"]},{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanghui Yang","raw_affiliation_strings":["Zhejiang University\nHuzhou Institute of Industrial Control Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University\nHuzhou Institute of Industrial Control Technology","institution_ids":["https://openalex.org/I3018263800","https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722839","display_name":"Xiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129678435","display_name":"Zhijiang Shao","orcid":null},"institutions":[{"id":"https://openalex.org/I3018263800","display_name":"Huzhou University","ror":"https://ror.org/04mvpxy20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3018263800"]},{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijiang Shao","raw_affiliation_strings":["Zhejiang University\nHuzhou Institute of Industrial Control Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University\nHuzhou Institute of Industrial Control Technology","institution_ids":["https://openalex.org/I3018263800","https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5020341921"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.64453961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"30","first_page":"25009","last_page":"25017"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8312000036239624,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8312000036239624,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.0364999994635582,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.034699998795986176,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lexicographical-order","display_name":"Lexicographical order","score":0.781499981880188},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7174000144004822},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6759999990463257},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6711000204086304},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5367000102996826},{"id":"https://openalex.org/keywords/gradient-method","display_name":"Gradient method","score":0.45820000767707825},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.37529999017715454}],"concepts":[{"id":"https://openalex.org/C159254197","wikidata":"https://www.wikidata.org/wiki/Q1144915","display_name":"Lexicographical order","level":2,"score":0.781499981880188},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7174000144004822},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6759999990463257},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6711000204086304},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6446999907493591},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.60589998960495},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5367000102996826},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.45820000767707825},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C47431972","wikidata":"https://www.wikidata.org/wiki/Q3045948","display_name":"ELECTRE","level":3,"score":0.3361000120639801},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.32249999046325684},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2872999906539917},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27709999680519104},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25999999046325684}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i30.39689","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i30.39689","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39689/43650","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i30.39689","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i30.39689","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39689/43650","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138431587.pdf","grobid_xml":"https://content.openalex.org/works/W7138431587.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Lexicographic":[0,54],"multi-objective":[1],"problems,":[2],"which":[3,101],"consist":[4],"of":[5,20,195],"multiple":[6,33],"conflicting":[7],"subtasks":[8],"with":[9,76,117],"explicit":[10],"priorities,":[11],"are":[12,80],"common":[13],"in":[14,24,122,186],"real-world":[15],"applications.":[16],"Despite":[17],"the":[18,127,193],"advantages":[19],"Reinforcement":[21],"Learning":[22],"(RL)":[23],"single":[25],"tasks,":[26],"extending":[27],"conventional":[28],"RL":[29,41,44,56,95],"methods":[30,46,58,69],"to":[31,62,82,106,142,149,159],"prioritized":[32],"objectives":[34],"remains":[35],"challenging.":[36],"In":[37,152],"particular,":[38],"traditional":[39],"Safe":[40],"and":[42,134,165,174],"Multi-Objective":[43,55],"(MORL)":[45],"have":[47,59],"difficulty":[48],"enforcing":[49],"priority":[50],"orderings":[51],"efficiently.":[52],"Therefore,":[53],"(LMORL)":[57],"been":[60],"developed":[61],"address":[63],"these":[64,87],"challenges.":[65],"However,":[66],"existing":[67,201],"LMORL":[68,99,204],"either":[70],"rely":[71],"on":[72,179],"heuristic":[73],"threshold":[74],"tuning":[75],"prior":[77],"knowledge":[78],"or":[79],"restricted":[81],"discrete":[83],"domains.":[84],"To":[85],"overcome":[86],"limitations,":[88],"we":[89,191],"propose":[90],"Lexicographically":[91],"Projected":[92],"Policy":[93],"Gradient":[94],"(LPPG-RL),":[96],"a":[97,176,187],"novel":[98],"framework":[100],"leverages":[102],"sequential":[103],"gradient":[104,120,161],"projections":[105],"identify":[107],"feasible":[108],"policy":[109,119,180],"update":[110],"directions,":[111],"thereby":[112],"enabling":[113],"LPPG-RL":[114,125,154],"broadly":[115],"compatible":[116],"all":[118],"algorithms":[121],"continuous":[123,203],"spaces.":[124],"reformulates":[126],"projection":[128,137],"step":[129],"as":[130],"an":[131],"optimization":[132],"problem,":[133],"utilizes":[135],"Dykstra's":[136],"rather":[138],"than":[139],"generic":[140],"solvers":[141],"deliver":[143],"great":[144],"speedups,":[145],"especially":[146],"for":[147,172],"small-":[148],"medium-scale":[150],"instances.":[151],"addition,":[153],"introduces":[155],"Subproblem":[156],"Exploration":[157],"(SE)":[158],"prevent":[160],"vanishing,":[162],"accelerate":[163],"convergence":[164,173],"enhance":[166],"stability.":[167],"We":[168],"provide":[169],"theoretical":[170],"guarantees":[171],"establish":[175],"lower":[177],"bound":[178],"improvement.":[181],"Finally,":[182],"through":[183],"extensive":[184],"experiments":[185],"2D":[188],"navigation":[189],"environment,":[190],"demonstrate":[192],"effectiveness":[194],"LPPG-RL,":[196],"showing":[197],"that":[198],"it":[199],"outperforms":[200],"state-of-the-art":[202],"methods.":[205]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
