{"id":"https://openalex.org/W4408522761","doi":"https://doi.org/10.1080/03081079.2025.2474517","title":"Single trajectory-based policy optimization for discrete-time stochastic systems","display_name":"Single trajectory-based policy optimization for discrete-time stochastic systems","publication_year":2025,"publication_date":"2025-03-17","ids":{"openalex":"https://openalex.org/W4408522761","doi":"https://doi.org/10.1080/03081079.2025.2474517"},"language":"en","primary_location":{"id":"doi:10.1080/03081079.2025.2474517","is_oa":false,"landing_page_url":"https://doi.org/10.1080/03081079.2025.2474517","pdf_url":null,"source":{"id":"https://openalex.org/S145553572","display_name":"International Journal of General Systems","issn_l":"0308-1079","issn":["0308-1079","1026-7492","1563-5104"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of General Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032771161","display_name":"Jing Lai","orcid":"https://orcid.org/0000-0002-9920-0066"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Lai","raw_affiliation_strings":["Hefei University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hefei University of Technology","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021953090","display_name":"Junlin Xiong","orcid":"https://orcid.org/0000-0002-0128-4960"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junlin Xiong","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5021953090"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02004023,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"54","issue":"8","first_page":"1044","last_page":"1070"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.7011418342590332},{"id":"https://openalex.org/keywords/discrete-time-and-continuous-time","display_name":"Discrete time and continuous time","score":0.6054086089134216},{"id":"https://openalex.org/keywords/trajectory-optimization","display_name":"Trajectory optimization","score":0.5413070321083069},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5391964912414551},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.487119197845459},{"id":"https://openalex.org/keywords/stochastic-optimization","display_name":"Stochastic optimization","score":0.44900259375572205},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.34482020139694214},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.22032639384269714},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09870710968971252}],"concepts":[{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.7011418342590332},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.6054086089134216},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.5413070321083069},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5391964912414551},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.487119197845459},{"id":"https://openalex.org/C194387892","wikidata":"https://www.wikidata.org/wiki/Q1747770","display_name":"Stochastic optimization","level":2,"score":0.44900259375572205},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.34482020139694214},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.22032639384269714},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09870710968971252},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/03081079.2025.2474517","is_oa":false,"landing_page_url":"https://doi.org/10.1080/03081079.2025.2474517","pdf_url":null,"source":{"id":"https://openalex.org/S145553572","display_name":"International Journal of General Systems","issn_l":"0308-1079","issn":["0308-1079","1026-7492","1563-5104"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of General Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8020654489","display_name":null,"funder_award_id":"62273320","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1979381096","https://openalex.org/W1984332158","https://openalex.org/W1998895316","https://openalex.org/W2024303516","https://openalex.org/W2098432798","https://openalex.org/W2137798267","https://openalex.org/W2145339207","https://openalex.org/W2155208704","https://openalex.org/W2336534989","https://openalex.org/W2526983455","https://openalex.org/W2752077342","https://openalex.org/W2776811469","https://openalex.org/W2804656147","https://openalex.org/W2963774238","https://openalex.org/W2970161765","https://openalex.org/W2979982637","https://openalex.org/W3035230405","https://openalex.org/W3098412154","https://openalex.org/W3118758689","https://openalex.org/W3152878473","https://openalex.org/W3177309855","https://openalex.org/W3209135762","https://openalex.org/W4214717370","https://openalex.org/W4221162264","https://openalex.org/W4229706427","https://openalex.org/W4250589301","https://openalex.org/W4250954493","https://openalex.org/W4253732449","https://openalex.org/W4293195488","https://openalex.org/W4301886962","https://openalex.org/W4308902993","https://openalex.org/W4361019560","https://openalex.org/W4382658113","https://openalex.org/W4387965994","https://openalex.org/W4388520166","https://openalex.org/W4388622959","https://openalex.org/W6674995601"],"related_works":["https://openalex.org/W4385832323","https://openalex.org/W4244391535","https://openalex.org/W2356996864","https://openalex.org/W2904060783","https://openalex.org/W2015393961","https://openalex.org/W2378339670","https://openalex.org/W2359353485","https://openalex.org/W2361427670","https://openalex.org/W2139910871","https://openalex.org/W2119925415"],"abstract_inverted_index":{"Policy":[0],"optimization":[1,30,57,71,102,137],"has":[2],"reemerged":[3],"as":[4],"an":[5],"important":[6],"approach":[7],"for":[8,21,59],"reinforcement":[9],"learning":[10],"and":[11,42],"optimal":[12,78],"control":[13,79],"problems.":[14],"A":[15],"notable":[16],"drawback":[17],"is":[18,123],"that":[19],"even":[20],"linear":[22],"quadratic":[23],"problems,":[24],"the":[25,39,77,97,104],"execution":[26,105],"of":[27,69,86,100,106],"model-free":[28,83],"policy":[29,56,70,80,101,136],"methods":[31,72],"generally":[32],"relies":[33],"on":[34],"multiple":[35],"trajectories":[36,117],"to":[37,63,75],"estimate":[38],"cost":[40],"gradient":[41],"state":[43],"covariance":[44],"matrix":[45],"in":[46,81,118],"each":[47,119],"iteration.":[48,120],"This":[49],"paper":[50],"proposes":[51],"a":[52,82,110],"novel":[53],"single":[54,111],"trajectory-based":[55],"algorithm":[58,108,122,132],"stochastic":[60],"systems":[61],"subject":[62],"multiplicative":[64],"noises.":[65],"Specifically,":[66],"three":[67],"variants":[68],"are":[73,88],"proposed":[74],"learn":[76],"manner,":[84],"all":[85],"which":[87,129],"supported":[89],"with":[90,96],"provable":[91],"convergence":[92],"results.":[93],"In":[94],"contrast":[95],"existing":[98],"work":[99],"algorithms,":[103],"our":[107,131],"reuses":[109],"system":[112,116],"trajectory":[113],"without":[114],"regenerating":[115],"Our":[121],"evaluated":[124,135],"through":[125],"several":[126],"numerical":[127],"examples,":[128],"imply":[130],"outperforms":[133],"other":[134],"algorithms.":[138]},"counts_by_year":[],"updated_date":"2026-06-15T08:34:33.830935","created_date":"2025-10-10T00:00:00"}
