{"id":"https://openalex.org/W4210250498","doi":"https://doi.org/10.1109/cdc45484.2021.9683350","title":"On the Search for Feedback in Reinforcement Learning","display_name":"On the Search for Feedback in Reinforcement Learning","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W4210250498","doi":"https://doi.org/10.1109/cdc45484.2021.9683350"},"language":"en","primary_location":{"id":"doi:10.1109/cdc45484.2021.9683350","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683350","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115595561","display_name":"Ran Wang","orcid":"https://orcid.org/0000-0002-3606-3527"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ran Wang","raw_affiliation_strings":["Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001383734","display_name":"Karthikeya S. Parunandi","orcid":"https://orcid.org/0000-0003-2733-3385"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karthikeya S. Parunandi","raw_affiliation_strings":["Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078202724","display_name":"Aayushman Sharma","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aayushman Sharma","raw_affiliation_strings":["Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072338368","display_name":"Raman Goyal","orcid":"https://orcid.org/0000-0002-8128-3051"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raman Goyal","raw_affiliation_strings":["Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061286389","display_name":"Suman Chakravorty","orcid":"https://orcid.org/0000-0003-3074-3406"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suman Chakravorty","raw_affiliation_strings":["Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115595561"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":0.8796,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78220228,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1560","last_page":"1567"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8632122278213501},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6529741287231445},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6059156060218811},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.6042803525924683},{"id":"https://openalex.org/keywords/parametrization","display_name":"Parametrization (atmospheric modeling)","score":0.6025629639625549},{"id":"https://openalex.org/keywords/feedback-loop","display_name":"Feedback loop","score":0.4720279276371002},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.46671149134635925},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4455919861793518},{"id":"https://openalex.org/keywords/open-loop-controller","display_name":"Open-loop controller","score":0.42794013023376465},{"id":"https://openalex.org/keywords/local-search","display_name":"Local search (optimization)","score":0.4171919524669647},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4150097668170929},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3912183344364166},{"id":"https://openalex.org/keywords/closed-loop","display_name":"Closed loop","score":0.35616254806518555},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3467264175415039},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2515537738800049},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24484318494796753},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.21094295382499695},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.11988386511802673},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.09612634778022766},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09265658259391785}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8632122278213501},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6529741287231445},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6059156060218811},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.6042803525924683},{"id":"https://openalex.org/C202887219","wikidata":"https://www.wikidata.org/wiki/Q3895221","display_name":"Parametrization (atmospheric modeling)","level":3,"score":0.6025629639625549},{"id":"https://openalex.org/C186886427","wikidata":"https://www.wikidata.org/wiki/Q5441213","display_name":"Feedback loop","level":2,"score":0.4720279276371002},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.46671149134635925},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4455919861793518},{"id":"https://openalex.org/C161362739","wikidata":"https://www.wikidata.org/wiki/Q2301555","display_name":"Open-loop controller","level":3,"score":0.42794013023376465},{"id":"https://openalex.org/C135320971","wikidata":"https://www.wikidata.org/wiki/Q1868524","display_name":"Local search (optimization)","level":2,"score":0.4171919524669647},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4150097668170929},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3912183344364166},{"id":"https://openalex.org/C3019251811","wikidata":"https://www.wikidata.org/wiki/Q5135346","display_name":"Closed loop","level":2,"score":0.35616254806518555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3467264175415039},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2515537738800049},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24484318494796753},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.21094295382499695},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.11988386511802673},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.09612634778022766},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09265658259391785},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C74902906","wikidata":"https://www.wikidata.org/wiki/Q1190858","display_name":"Radiative transfer","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc45484.2021.9683350","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683350","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W234738007","https://openalex.org/W1544193923","https://openalex.org/W1771410628","https://openalex.org/W1985235885","https://openalex.org/W2025752438","https://openalex.org/W2031225427","https://openalex.org/W2056257343","https://openalex.org/W2066557929","https://openalex.org/W2087617385","https://openalex.org/W2140135625","https://openalex.org/W2147032798","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2167856595","https://openalex.org/W2257979135","https://openalex.org/W2416041116","https://openalex.org/W2487144912","https://openalex.org/W2554984891","https://openalex.org/W2736601468","https://openalex.org/W2747402019","https://openalex.org/W2749928749","https://openalex.org/W2754517384","https://openalex.org/W2781585732","https://openalex.org/W2963864421","https://openalex.org/W2963987440","https://openalex.org/W3107813559","https://openalex.org/W4210250498","https://openalex.org/W4236323066","https://openalex.org/W4288366590","https://openalex.org/W6638018090","https://openalex.org/W6680657880","https://openalex.org/W6681631837","https://openalex.org/W6682849425","https://openalex.org/W6684827244","https://openalex.org/W6684921986","https://openalex.org/W6729507393","https://openalex.org/W6741002519","https://openalex.org/W6742945991","https://openalex.org/W6747387971","https://openalex.org/W6761461577","https://openalex.org/W6774521903","https://openalex.org/W6780559895","https://openalex.org/W6786055145"],"related_works":["https://openalex.org/W2103982267","https://openalex.org/W2065196227","https://openalex.org/W2319944002","https://openalex.org/W3039582398","https://openalex.org/W2558015143","https://openalex.org/W2724432464","https://openalex.org/W2357305699","https://openalex.org/W2126107835","https://openalex.org/W1484482787","https://openalex.org/W1552820068"],"abstract_inverted_index":{"The":[0],"problem":[1],"of":[2,26,63,108,136],"Reinforcement":[3],"Learning":[4],"(RL)":[5],"in":[6,86],"an":[7,18,64,68],"unknown":[8,28],"nonlinear":[9,39],"dynamical":[10,29],"system":[11],"is":[12,103,120],"equivalent":[13],"to":[14,105,123,132],"the":[15,23,27,77,90,99,109,124,137],"search":[16,34],"for":[17],"optimal":[19,70],"feedback":[20,40,60,72,139],"law":[21,73],"utilizing":[22],"simulations/":[24],"rollouts":[25],"system.":[30],"Most":[31],"RL":[32,111],"techniques":[33],"over":[35,57],"a":[36,58],"complex":[37],"global":[38,106,134],"parametrization":[41],"making":[42],"them":[43],"suffer":[44],"from":[45],"high":[46],"training":[47],"times":[48],"as":[49,51],"well":[50],"variance.":[52],"Instead,":[53],"we":[54,115],"advocate":[55],"searching":[56],"local":[59,128],"representation":[61],"consisting":[62],"open-loop":[65],"sequence,":[66],"and":[67,95,98,126],"associated":[69],"linear":[71],"completely":[74],"determined":[75],"by":[76],"open-loop.":[78],"We":[79],"show":[80],"that":[81],"this":[82],"alternate":[83],"approach":[84],"results":[85],"highly":[87],"efficient":[88],"training,":[89],"answers":[91],"obtained":[92],"are":[93],"repeatable":[94],"hence":[96],"reliable,":[97],"resulting":[100,138],"closed":[101],"performance":[102],"superior":[104],"state":[107],"art":[110],"techniques.":[112],"Finally,":[113],"if":[114],"replan,":[116],"whenever":[117],"required,":[118],"which":[119],"feasible":[121],"due":[122],"fast":[125],"reliable":[127],"solution,":[129],"allows":[130],"us":[131],"recover":[133],"optimality":[135],"law.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
