{"id":"https://openalex.org/W4205689302","doi":"https://doi.org/10.23919/ecc54610.2021.9655004","title":"Model-free Control Design Using Policy Gradient Reinforcement Learning in LPV Framework","display_name":"Model-free Control Design Using Policy Gradient Reinforcement Learning in LPV Framework","publication_year":2021,"publication_date":"2021-06-29","ids":{"openalex":"https://openalex.org/W4205689302","doi":"https://doi.org/10.23919/ecc54610.2021.9655004"},"language":"en","primary_location":{"id":"doi:10.23919/ecc54610.2021.9655004","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ecc54610.2021.9655004","pdf_url":null,"source":{"id":"https://openalex.org/S4363608272","display_name":"2021 European Control Conference (ECC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 European Control Conference (ECC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058947568","display_name":"Yajie Bao","orcid":"https://orcid.org/0000-0001-8773-926X"},"institutions":[{"id":"https://openalex.org/I165733156","display_name":"University of Georgia","ror":"https://ror.org/00te3t702","country_code":"US","type":"education","lineage":["https://openalex.org/I165733156"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yajie Bao","raw_affiliation_strings":["School of Electrical & Computer Engineering, University of Georgia, Athens, GA"],"affiliations":[{"raw_affiliation_string":"School of Electrical & Computer Engineering, University of Georgia, Athens, GA","institution_ids":["https://openalex.org/I165733156"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086596463","display_name":"Javad Mohammadpour Velni","orcid":"https://orcid.org/0000-0001-8546-221X"},"institutions":[{"id":"https://openalex.org/I165733156","display_name":"University of Georgia","ror":"https://ror.org/00te3t702","country_code":"US","type":"education","lineage":["https://openalex.org/I165733156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Javad Mohammadpour Velni","raw_affiliation_strings":["School of Electrical & Computer Engineering, University of Georgia, Athens, GA"],"affiliations":[{"raw_affiliation_string":"School of Electrical & Computer Engineering, University of Georgia, Athens, GA","institution_ids":["https://openalex.org/I165733156"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5058947568"],"corresponding_institution_ids":["https://openalex.org/I165733156"],"apc_list":null,"apc_paid":null,"fwci":0.8727,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.67552035,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"150","last_page":"155"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11749","display_name":"Iterative Learning Control Systems","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11749","display_name":"Iterative Learning Control Systems","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14225","display_name":"Advanced Sensor and Control Systems","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11372","display_name":"Hydraulic and Pneumatic Systems","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8887271881103516},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6309576630592346},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5484403967857361},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4345189332962036},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.34027963876724243},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.334097683429718},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.32041335105895996},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2030746340751648}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8887271881103516},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6309576630592346},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5484403967857361},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4345189332962036},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.34027963876724243},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.334097683429718},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.32041335105895996},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2030746340751648},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/ecc54610.2021.9655004","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ecc54610.2021.9655004","pdf_url":null,"source":{"id":"https://openalex.org/S4363608272","display_name":"2021 European Control Conference (ECC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 European Control Conference (ECC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.800000011920929,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1989317569","https://openalex.org/W2017957151","https://openalex.org/W2026903670","https://openalex.org/W2170990015","https://openalex.org/W2245176828","https://openalex.org/W2768148423","https://openalex.org/W2780439059","https://openalex.org/W2788388592","https://openalex.org/W2910717362","https://openalex.org/W2923554444","https://openalex.org/W2963864421","https://openalex.org/W2970677506","https://openalex.org/W2981389409","https://openalex.org/W3027406032","https://openalex.org/W3045882693","https://openalex.org/W3125192641","https://openalex.org/W3154840238","https://openalex.org/W4214717370","https://openalex.org/W4288319859","https://openalex.org/W4302570325","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6757790240","https://openalex.org/W6764053384","https://openalex.org/W6778390183"],"related_works":["https://openalex.org/W8539471","https://openalex.org/W5435649","https://openalex.org/W8137082","https://openalex.org/W4651166","https://openalex.org/W11162148","https://openalex.org/W3471107","https://openalex.org/W8637261","https://openalex.org/W5779190","https://openalex.org/W3551423","https://openalex.org/W3289701"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"an":[3],"off-policy":[4,30],"policy":[5],"gradient":[6],"reinforcement":[7],"learning":[8,62],"(RL)":[9],"approach":[10,162],"to":[11,60,75,106,117],"control":[12,56,71,128,150,167],"nonlinear":[13],"systems":[14],"in":[15,89,136,168,173],"linear":[16],"parameter-varying":[17],"(LPV)":[18],"framework.":[19],"The":[20,87],"(parameter-varying)":[21],"controller":[22],"is":[23,58,73,110,115,134],"learned":[24,120],"from":[25],"the":[26,41,65,69,77,80,90,119,123,127,138,157,160,169],"closed-loop":[27],"trajectories":[28],"using":[29,144],"actor-critic":[31],"methods":[32],"for":[33,163],"RL,":[34],"instead":[35],"of":[36,79,92,126,140,159,171],"being":[37],"designed":[38],"based":[39],"on":[40],"system":[42],"model.":[43],"To":[44],"decrease":[45],"constraint":[46],"violation":[47],"(and":[48],"hence":[49],"improve":[50],"safety),":[51],"exploration":[52],"around":[53],"a":[54,149],"valid":[55,70],"sequence":[57,72],"proposed":[59,161],"facilitate":[61],"before":[63],"applying":[64],"real":[66],"policy.":[67],"Additionally,":[68],"used":[74,116],"determine":[76],"complexity":[78],"actor":[81,141],"and":[82,113,148,165],"critic":[83],"neural":[84],"networks":[85],"(NN).":[86],"uncertainties":[88],"evolution":[91],"LPV":[93,174],"scheduling":[94,103,108,175],"variables":[95],"are":[96],"tackled":[97],"by":[98],"generating":[99],"episodes":[100],"with":[101],"varying":[102],"trajectories.":[104],"Adapting":[105],"unseen":[107],"signals":[109],"empirically":[111],"investigated":[112],"fine-tuning":[114],"refine":[118],"controller.":[121],"Furthermore,":[122],"prior":[124],"knowledge":[125],"law":[129],"(e.g.,":[130],"static":[131],"state":[132],"feedback)":[133],"embedded":[135],"designing":[137],"structure":[139],"NN.":[142],"Experiments":[143],"two":[145],"numerical":[146],"examples":[147],"moment":[151],"gyroscope":[152],"(CMG)":[153],"simulation":[154],"model":[155],"illustrate":[156],"success":[158],"stabilization":[164],"tracking":[166],"presence":[170],"uncertainty":[172],"variables.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
