{"id":"https://openalex.org/W1612466322","doi":"https://doi.org/10.1109/iccis.2015.7274545","title":"H&lt;inf&gt;&amp;#x221E;&lt;/inf&gt; optimal control of unknown linear discrete-time systems: An off-policy reinforcement learning approach","display_name":"H&lt;inf&gt;&amp;#x221E;&lt;/inf&gt; optimal control of unknown linear discrete-time systems: An off-policy reinforcement learning approach","publication_year":2015,"publication_date":"2015-07-01","ids":{"openalex":"https://openalex.org/W1612466322","doi":"https://doi.org/10.1109/iccis.2015.7274545","mag":"1612466322"},"language":"en","primary_location":{"id":"doi:10.1109/iccis.2015.7274545","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccis.2015.7274545","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 7th International Conference on Cybernetics and Intelligent Systems (CIS) and IEEE Conference on Robotics, Automation and Mechatronics (RAM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089224471","display_name":"Bahare Kiumarsi","orcid":"https://orcid.org/0000-0002-9701-8375"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bahare Kiumarsi","raw_affiliation_strings":["UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA"],"affiliations":[{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","institution_ids":["https://openalex.org/I189196454"]},{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063118155","display_name":"Hamidreza Modares","orcid":"https://orcid.org/0000-0003-0800-5140"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hamidreza Modares","raw_affiliation_strings":["UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA"],"affiliations":[{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","institution_ids":["https://openalex.org/I189196454"]},{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016137188","display_name":"Frank L. Lewis","orcid":"https://orcid.org/0000-0003-4074-1615"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank L. Lewis","raw_affiliation_strings":["UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA"],"affiliations":[{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX, USA","institution_ids":["https://openalex.org/I189196454"]},{"raw_affiliation_string":"UTA Research Institute UTARI, The University of Texas at Arlington, Ft. Worth, TX 76118, USA","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067046312","display_name":"Zhong\u2010Ping Jiang","orcid":"https://orcid.org/0000-0002-4868-9359"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhong-Ping Jiang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, New York University, Brooklyn, NY, USA","Control and Networks Lab, Department of Electrical and Computer Engineering, Polytechnic School of Engineering, New York University, Brooklyn, NY 11201, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, New York University, Brooklyn, NY, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Control and Networks Lab, Department of Electrical and Computer Engineering, Polytechnic School of Engineering, New York University, Brooklyn, NY 11201, USA#TAB#","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089224471"],"corresponding_institution_ids":["https://openalex.org/I189196454"],"apc_list":null,"apc_paid":null,"fwci":1.4021,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.81636352,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"45","issue":null,"first_page":"41","last_page":"46"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10675","display_name":"Mechanical Circulatory Support Devices","score":0.9605000019073486,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8124281167984009},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5772479772567749},{"id":"https://openalex.org/keywords/discrete-time-and-continuous-time","display_name":"Discrete time and continuous time","score":0.5508744716644287},{"id":"https://openalex.org/keywords/algebraic-riccati-equation","display_name":"Algebraic Riccati equation","score":0.538711667060852},{"id":"https://openalex.org/keywords/algebraic-number","display_name":"Algebraic number","score":0.49543941020965576},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4923640489578247},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.44784584641456604},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.44370636343955994},{"id":"https://openalex.org/keywords/riccati-equation","display_name":"Riccati equation","score":0.3712828755378723},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3680294156074524},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.36647748947143555},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.3425384759902954},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28684747219085693},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26396816968917847},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.12810760736465454},{"id":"https://openalex.org/keywords/differential-equation","display_name":"Differential equation","score":0.10229340195655823},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09646984934806824}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8124281167984009},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5772479772567749},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.5508744716644287},{"id":"https://openalex.org/C13847129","wikidata":"https://www.wikidata.org/wiki/Q4723989","display_name":"Algebraic Riccati equation","level":4,"score":0.538711667060852},{"id":"https://openalex.org/C9376300","wikidata":"https://www.wikidata.org/wiki/Q168817","display_name":"Algebraic number","level":2,"score":0.49543941020965576},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4923640489578247},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.44784584641456604},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.44370636343955994},{"id":"https://openalex.org/C45473103","wikidata":"https://www.wikidata.org/wiki/Q851503","display_name":"Riccati equation","level":3,"score":0.3712828755378723},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3680294156074524},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.36647748947143555},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.3425384759902954},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28684747219085693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26396816968917847},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.12810760736465454},{"id":"https://openalex.org/C78045399","wikidata":"https://www.wikidata.org/wiki/Q11214","display_name":"Differential equation","level":2,"score":0.10229340195655823},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09646984934806824},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccis.2015.7274545","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccis.2015.7274545","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE 7th International Conference on Cybernetics and Intelligent Systems (CIS) and IEEE Conference on Robotics, Automation and Mechatronics (RAM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1578630563","https://openalex.org/W1968315580","https://openalex.org/W2005437559","https://openalex.org/W2012451615","https://openalex.org/W2013895638","https://openalex.org/W2043903123","https://openalex.org/W2063358594","https://openalex.org/W2139416664","https://openalex.org/W2148439597","https://openalex.org/W2155979098","https://openalex.org/W2484646121","https://openalex.org/W2498694880","https://openalex.org/W2727450595","https://openalex.org/W3011120880","https://openalex.org/W3213472335","https://openalex.org/W4205326910","https://openalex.org/W4214717370","https://openalex.org/W6634494593","https://openalex.org/W6723937200","https://openalex.org/W6804409665"],"related_works":["https://openalex.org/W2005019312","https://openalex.org/W2105459831","https://openalex.org/W1546579375","https://openalex.org/W2127238600","https://openalex.org/W2096105566","https://openalex.org/W1563296412","https://openalex.org/W2114791549","https://openalex.org/W16457797","https://openalex.org/W2792258377","https://openalex.org/W1987193149"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,73,82,110,119,123],"model-free":[4,54],"H":[5,59],"<sub":[6,60],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[7,61],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">\u221e</sub>":[8,62],"control":[9,63,128,148],"design":[10],"for":[11,57],"linear":[12],"discrete-time":[13],"systems":[14],"using":[15,35],"reinforcement":[16],"learning":[17],"(RL).":[18],"A":[19,136],"novel":[20],"off-policy":[21],"RL":[22,45,55],"algorithm":[23,46],"is":[24,67,78,86,115,139],"used":[25,140],"to":[26,52,88,94,106,126,130,141],"solve":[27],"the":[28,36,40,48,127,143,146],"game":[29],"algebraic":[30],"Riccati":[31],"equation":[32],"(GARE)":[33],"online":[34],"measured":[37],"data":[38,68],"along":[39],"system":[41],"trajectories.":[42],"The":[43,100],"proposed":[44,147],"has":[47],"following":[49],"advantages":[50],"compared":[51],"existing":[53],"methods":[56],"solving":[58],"problem:":[64],"1)":[65],"It":[66],"efficient":[69],"and":[70],"fast":[71],"since":[72],"stream":[74],"of":[75,121,133,145],"experiences":[76],"which":[77],"obtained":[79],"from":[80],"executing":[81],"fixed":[83],"behavioral":[84],"policy":[85],"reused":[87],"update":[89],"many":[90],"value":[91],"functions":[92],"correspond":[93],"different":[95],"leaning":[96],"policies":[97],"sequentially.":[98],"2)":[99],"disturbance":[101],"input":[102,129],"does":[103],"not":[104],"need":[105],"be":[107],"adjusted":[108],"in":[109],"specific":[111],"manner.":[112],"3)":[113],"There":[114],"no":[116],"bias":[117],"as":[118],"result":[120],"adding":[122],"probing":[124],"noise":[125],"maintain":[131],"persistence":[132],"excitation":[134],"conditions.":[135],"simulation":[137],"example":[138],"verify":[142],"effectiveness":[144],"scheme.":[149]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
