{"id":"https://openalex.org/W4391462862","doi":"https://doi.org/10.48550/arxiv.2401.17780","title":"A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees","display_name":"A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees","publication_year":2024,"publication_date":"2024-01-31","ids":{"openalex":"https://openalex.org/W4391462862","doi":"https://doi.org/10.48550/arxiv.2401.17780"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2401.17780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17780","pdf_url":"https://arxiv.org/pdf/2401.17780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2401.17780","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031877106","display_name":"Toshinori Kitamura","orcid":"https://orcid.org/0000-0002-2326-3140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kitamura, Toshinori","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070075141","display_name":"Tadashi Kozuno","orcid":"https://orcid.org/0000-0002-8820-1362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kozuno, Tadashi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083089288","display_name":"Masahiro Kato","orcid":"https://orcid.org/0000-0003-4050-023X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kato, Masahiro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065789639","display_name":"Yuki Ichihara","orcid":"https://orcid.org/0000-0002-0013-5373"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ichihara, Yuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091587258","display_name":"Soichiro Nishimori","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nishimori, Soichiro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056836344","display_name":"Akiyoshi Sannai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sannai, Akiyoshi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036537988","display_name":"Sho Sonoda","orcid":"https://orcid.org/0000-0001-7242-4740"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sonoda, Sho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003820265","display_name":"Wataru Kumagai","orcid":"https://orcid.org/0000-0002-3081-5951"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumagai, Wataru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074059447","display_name":"Yutaka Matsuo","orcid":"https://orcid.org/0000-0002-2070-4393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matsuo, Yutaka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9218000173568726,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9218000173568726,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.7530910968780518},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5850131511688232},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5554816126823425},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5052424073219299},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2897193431854248}],"concepts":[{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.7530910968780518},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5850131511688232},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5554816126823425},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5052424073219299},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2897193431854248},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2401.17780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17780","pdf_url":"https://arxiv.org/pdf/2401.17780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2401.17780","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2401.17780","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2401.17780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17780","pdf_url":"https://arxiv.org/pdf/2401.17780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6912369398","display_name":null,"funder_award_id":"JPMJPR2125","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G7615765131","display_name":null,"funder_award_id":"19H04071","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338111","display_name":"Precursory Research for Embryonic Science and Technology","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391462862.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"We":[0],"study":[1],"a":[2,49,100],"primal-dual":[3],"(PD)":[4],"reinforcement":[5],"learning":[6],"(RL)":[7],"algorithm":[8,54,84,105],"for":[9,28,74,85],"online":[10,87],"constrained":[11],"Markov":[12],"decision":[13],"processes":[14],"(CMDPs).":[15],"Despite":[16],"its":[17],"widespread":[18],"practical":[19],"use,":[20],"the":[21,81,86,93],"existing":[22],"theoretical":[23,94],"literature":[24],"on":[25],"PD-RL":[26],"algorithms":[27,112],"this":[29,45,79],"problem":[30],"only":[31],"provides":[32],"sublinear":[33,68],"regret":[34],"guarantees":[35],"and":[36,70,116],"fails":[37],"to":[38,41,65,92,107],"ensure":[39],"convergence":[40,64],"optimal":[42,66,108],"policies.":[43],"In":[44,90],"paper,":[46],"we":[47,96],"introduce":[48],"novel":[50],"policy":[51],"gradient":[52],"PD":[53],"with":[55],"uniform":[56],"probably":[57],"approximate":[58],"correctness":[59],"(Uniform-PAC)":[60],"guarantees,":[61,95],"simultaneously":[62],"ensuring":[63],"policies,":[67,109],"regret,":[69],"polynomial":[71],"sample":[72],"complexity":[73],"any":[75],"target":[76],"accuracy.":[77],"Notably,":[78],"represents":[80],"first":[82],"Uniform-PAC":[83],"CMDP":[88,102],"problem.":[89],"addition":[91],"empirically":[97],"demonstrate":[98],"in":[99],"simple":[101],"that":[103],"our":[104],"converges":[106],"while":[110],"baseline":[111],"exhibit":[113],"oscillatory":[114],"performance":[115],"constraint":[117],"violation.":[118]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
