{"id":"https://openalex.org/W3146773041","doi":"https://doi.org/10.1080/00207179.2021.1913516","title":"Bellman's principle of optimality and deep reinforcement learning for time-varying tasks","display_name":"Bellman's principle of optimality and deep reinforcement learning for time-varying tasks","publication_year":2021,"publication_date":"2021-04-06","ids":{"openalex":"https://openalex.org/W3146773041","doi":"https://doi.org/10.1080/00207179.2021.1913516","mag":"3146773041"},"language":"en","primary_location":{"id":"doi:10.1080/00207179.2021.1913516","is_oa":false,"landing_page_url":"https://doi.org/10.1080/00207179.2021.1913516","pdf_url":null,"source":{"id":"https://openalex.org/S88061139","display_name":"International Journal of Control","issn_l":"0020-7179","issn":["0020-7179","1366-5820"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/11573/1545947","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009770666","display_name":"Alessandro Giuseppi","orcid":"https://orcid.org/0000-0001-5503-8506"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alessandro Giuseppi","raw_affiliation_strings":["Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy","Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \"La Sapeinza\", Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy","institution_ids":["https://openalex.org/I861853513"]},{"raw_affiliation_string":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \"La Sapeinza\", Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088978718","display_name":"Antonio Pietrabissa","orcid":"https://orcid.org/0000-0003-0188-3346"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Antonio Pietrabissa","raw_affiliation_strings":["Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy","Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \"La Sapeinza\", Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \u201cLa Sapeinza\u201d, Rome, Italy","institution_ids":["https://openalex.org/I861853513"]},{"raw_affiliation_string":"Department of Computer, Control, and Management Engineering, Antonio Ruberti at the University of Rome \"La Sapeinza\", Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5009770666"],"corresponding_institution_ids":["https://openalex.org/I861853513"],"apc_list":null,"apc_paid":null,"fwci":0.5439,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.71427951,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"95","issue":"9","first_page":"2448","last_page":"2459"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.891514241695404},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.7316001057624817},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6779760122299194},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6426854133605957},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5764527916908264},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5449799299240112},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.519594669342041},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4885929226875305},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4627862572669983},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.45974451303482056},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.44128406047821045},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4358004629611969},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41348084807395935},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24593257904052734},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2320099174976349}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.891514241695404},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.7316001057624817},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6779760122299194},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6426854133605957},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5764527916908264},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5449799299240112},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.519594669342041},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4885929226875305},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4627862572669983},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.45974451303482056},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.44128406047821045},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4358004629611969},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41348084807395935},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24593257904052734},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2320099174976349},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1080/00207179.2021.1913516","is_oa":false,"landing_page_url":"https://doi.org/10.1080/00207179.2021.1913516","pdf_url":null,"source":{"id":"https://openalex.org/S88061139","display_name":"International Journal of Control","issn_l":"0020-7179","issn":["0020-7179","1366-5820"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Control","raw_type":"journal-article"},{"id":"pmh:oai:iris.uniroma1.it:11573/1545947","is_oa":true,"landing_page_url":"https://hdl.handle.net/11573/1545947","pdf_url":null,"source":{"id":"https://openalex.org/S4377196107","display_name":"IRIS Research product catalog (Sapienza University of Rome)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:iris.uniroma1.it:11573/1545947","is_oa":true,"landing_page_url":"https://hdl.handle.net/11573/1545947","pdf_url":null,"source":{"id":"https://openalex.org/S4377196107","display_name":"IRIS Research product catalog (Sapienza University of Rome)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1587845729","https://openalex.org/W1757796397","https://openalex.org/W1987725948","https://openalex.org/W2046376809","https://openalex.org/W2052305027","https://openalex.org/W2091565802","https://openalex.org/W2096215872","https://openalex.org/W2098432798","https://openalex.org/W2100276679","https://openalex.org/W2107726111","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2145483940","https://openalex.org/W2145983895","https://openalex.org/W2155968351","https://openalex.org/W2156371714","https://openalex.org/W2168945108","https://openalex.org/W2169743339","https://openalex.org/W2173248099","https://openalex.org/W2186820913","https://openalex.org/W2294805292","https://openalex.org/W2334782222","https://openalex.org/W2600133099","https://openalex.org/W2604873668","https://openalex.org/W2735995851","https://openalex.org/W2746553466","https://openalex.org/W2781726626","https://openalex.org/W2809097245","https://openalex.org/W2943927637","https://openalex.org/W2962787188","https://openalex.org/W2964200634","https://openalex.org/W2990189926","https://openalex.org/W2996965557","https://openalex.org/W3012026310","https://openalex.org/W3037207827","https://openalex.org/W3100201759","https://openalex.org/W3101221300","https://openalex.org/W4214717370","https://openalex.org/W4237591687","https://openalex.org/W6608012839","https://openalex.org/W6637967152","https://openalex.org/W6684839497","https://openalex.org/W6740879895","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2280099822","https://openalex.org/W3196472998","https://openalex.org/W3103643887","https://openalex.org/W2937181779","https://openalex.org/W1967821692","https://openalex.org/W3146773041","https://openalex.org/W4287324889","https://openalex.org/W3133324912","https://openalex.org/W2500481045","https://openalex.org/W3099401519"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,8,29,38,42,50,65,70,75,85,105,108,113,130],"first":[4],"framework":[5],"(up":[6],"to":[7,11,55,83,88,101,123],"authors'":[9],"knowledge)":[10],"address":[12,69],"time-varying":[13,125],"objectives":[14],"in":[15,58,132],"finite-horizon":[16],"Deep":[17],"Reinforcement":[18],"Learning":[19],"(DeepRL),":[20],"based":[21],"on":[22,28,46],"a":[23,81,133],"switching":[24],"control":[25],"solution":[26],"developed":[27],"ground":[30],"of":[31,34,41],"Bellman's":[32],"principle":[33],"optimality.":[35],"By":[36],"augmenting":[37],"state":[39,76],"space":[40,77],"system":[43],"with":[44,104],"information":[45],"its":[47,60],"visit":[48],"time,":[49],"DeepRL":[51,98,109],"agent":[52],"is":[53],"able":[54,122],"solve":[56,124],"problems":[57,72],"which":[59],"task":[61],"dynamically":[62],"changes":[63],"within":[64],"same":[66],"episode.":[67],"To":[68],"scalability":[71],"caused":[73],"by":[74,96],"augmentation,":[78],"we":[79],"propose":[80],"procedure":[82],"partition":[84],"episode":[86],"length":[87],"define":[89],"separate":[90],"sub-problems":[91],"that":[92],"are":[93,120],"then":[94],"solved":[95],"specialised":[97],"agents.":[99],"Contrary":[100],"standard":[102],"solutions,":[103],"proposed":[106],"approach":[107,131],"agents":[110],"correctly":[111],"estimate":[112],"value":[114],"function":[115],"at":[116],"each":[117],"time-step":[118],"and":[119],"hence":[121],"tasks.":[126],"Numerical":[127],"simulations":[128],"validate":[129],"classic":[134],"RL":[135],"environment.":[136]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
