{"id":"https://openalex.org/W1978316810","doi":"https://doi.org/10.1109/adprl.2014.7010649","title":"On-policy Q-learning for adaptive optimal control","display_name":"On-policy Q-learning for adaptive optimal control","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W1978316810","doi":"https://doi.org/10.1109/adprl.2014.7010649","mag":"1978316810"},"language":"en","primary_location":{"id":"doi:10.1109/adprl.2014.7010649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2014.7010649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072303405","display_name":"S. K. Jha","orcid":"https://orcid.org/0000-0003-4853-6085"},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Sumit Kumar Jha","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Delhi, New Delhi, India","Department of Electrical Engg., Indian Institute of Technology Delhi, New Delhi - 110016, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Delhi, New Delhi, India","institution_ids":["https://openalex.org/I68891433"]},{"raw_affiliation_string":"Department of Electrical Engg., Indian Institute of Technology Delhi, New Delhi - 110016, India","institution_ids":["https://openalex.org/I68891433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034219830","display_name":"Shubhendu Bhasin","orcid":"https://orcid.org/0000-0002-8002-9684"},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shubhendu Bhasin","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Delhi, New Delhi, India","Department of Electrical Engg., Indian Institute of Technology Delhi, New Delhi - 110016, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Delhi, New Delhi, India","institution_ids":["https://openalex.org/I68891433"]},{"raw_affiliation_string":"Department of Electrical Engg., Indian Institute of Technology Delhi, New Delhi - 110016, India","institution_ids":["https://openalex.org/I68891433"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5072303405"],"corresponding_institution_ids":["https://openalex.org/I68891433"],"apc_list":null,"apc_paid":null,"fwci":1.0683,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.76119816,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12277","display_name":"Frequency Control in Power Systems","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10040","display_name":"Adaptive Control of Nonlinear Systems","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.682554304599762},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5903677940368652},{"id":"https://openalex.org/keywords/lti-system-theory","display_name":"LTI system theory","score":0.564623236656189},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5435947179794312},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.523815929889679},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.5089247226715088},{"id":"https://openalex.org/keywords/adaptive-control","display_name":"Adaptive control","score":0.4920780062675476},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4570557773113251},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.43774110078811646},{"id":"https://openalex.org/keywords/system-dynamics","display_name":"System dynamics","score":0.4360429346561432},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.42525744438171387},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.38611289858818054},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3616557717323303},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.3511487543582916},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.2724224925041199},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15462133288383484}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.682554304599762},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5903677940368652},{"id":"https://openalex.org/C87698059","wikidata":"https://www.wikidata.org/wiki/Q1808960","display_name":"LTI system theory","level":3,"score":0.564623236656189},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5435947179794312},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.523815929889679},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.5089247226715088},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.4920780062675476},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4570557773113251},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.43774110078811646},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.4360429346561432},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.42525744438171387},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.38611289858818054},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3616557717323303},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3511487543582916},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.2724224925041199},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15462133288383484},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/adprl.2014.7010649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2014.7010649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1487127700","https://openalex.org/W1515851193","https://openalex.org/W1545148916","https://openalex.org/W1616818660","https://openalex.org/W1983523797","https://openalex.org/W2005437559","https://openalex.org/W2013895638","https://openalex.org/W2024303516","https://openalex.org/W2087063454","https://openalex.org/W2090167557","https://openalex.org/W2104843094","https://openalex.org/W2148439597","https://openalex.org/W2160698719","https://openalex.org/W3011120880","https://openalex.org/W3139377883","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W1892002403","https://openalex.org/W4309005041","https://openalex.org/W3151522584","https://openalex.org/W2962746246","https://openalex.org/W1989531755","https://openalex.org/W1497123311","https://openalex.org/W1967384683","https://openalex.org/W2892237068","https://openalex.org/W2090386787","https://openalex.org/W2076533818"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3],"novel":[4,76],"on-policy":[5],"Q-learning":[6],"approach":[7],"for":[8,15],"finding":[9],"the":[10,30,34,40,45,53,56,68,87,103],"optimal":[11,35],"control":[12,36],"policy":[13,37],"online":[14,63],"continuous-time":[16],"linear":[17],"time":[18],"invariant":[19],"(LTI)":[20],"systems":[21],"with":[22,67],"completely":[23],"unknown":[24,31],"dynamics.":[25,96],"The":[26,47],"proposed":[27,88],"result":[28,89],"estimates":[29],"parameters":[32,66],"of":[33,55,65,70,72,92,94],"based":[38,51],"on":[39,52],"fixed":[41],"point":[42],"equation":[43],"involving":[44],"Q-function.":[46],"gradient-based":[48],"update":[49],"laws,":[50],"minimization":[54],"Bellman's":[57],"error,":[58],"are":[59,99],"used":[60],"to":[61,84,101],"achieve":[62],"adaptation":[64],"use":[69],"persistence":[71],"excitation":[73],"condition.":[74],"A":[75],"asymptotically":[77],"convergent":[78],"state":[79],"derivative":[80],"estimator":[81],"is":[82,90],"presented":[83,100],"ensure":[85],"that":[86],"independent":[91],"knowledge":[93],"system":[95],"Simulation":[97],"results":[98],"validate":[102],"theoretical":[104],"development.":[105]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
