{"id":"https://openalex.org/W4387914516","doi":"https://doi.org/10.1109/codit58514.2023.10284089","title":"Model-Based Adaptation for Sample Efficient Transfer in Reinforcement Learning Control of Parameter-Varying Systems","display_name":"Model-Based Adaptation for Sample Efficient Transfer in Reinforcement Learning Control of Parameter-Varying Systems","publication_year":2023,"publication_date":"2023-07-03","ids":{"openalex":"https://openalex.org/W4387914516","doi":"https://doi.org/10.1109/codit58514.2023.10284089"},"language":"en","primary_location":{"id":"doi:10.1109/codit58514.2023.10284089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/codit58514.2023.10284089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 9th International Conference on Control, Decision and Information Technologies (CoDIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104081888","display_name":"Ibrahim Ahmed","orcid":"https://orcid.org/0000-0001-7814-5152"},"institutions":[{"id":"https://openalex.org/I4210160740","display_name":"Integrated Software (United States)","ror":"https://ror.org/05dp7m259","country_code":"US","type":"company","lineage":["https://openalex.org/I4210160740"]},{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ibrahim Ahmed","raw_affiliation_strings":["Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]},{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035863735","display_name":"Marcos Qui\u00f1ones-Grueiro","orcid":"https://orcid.org/0000-0001-5391-6774"},"institutions":[{"id":"https://openalex.org/I4210160740","display_name":"Integrated Software (United States)","ror":"https://ror.org/05dp7m259","country_code":"US","type":"company","lineage":["https://openalex.org/I4210160740"]},{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcos Quinones-Grueiro","raw_affiliation_strings":["Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]},{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051150754","display_name":"Gautam Biswas","orcid":"https://orcid.org/0000-0002-2752-3878"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]},{"id":"https://openalex.org/I4210160740","display_name":"Integrated Software (United States)","ror":"https://ror.org/05dp7m259","country_code":"US","type":"company","lineage":["https://openalex.org/I4210160740"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gautam Biswas","raw_affiliation_strings":["Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University,Nashville,TN,USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]},{"raw_affiliation_string":"Institute for Software Integrated Systems, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I4210160740","https://openalex.org/I200719446"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5104081888"],"corresponding_institution_ids":["https://openalex.org/I200719446","https://openalex.org/I4210160740"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57028321,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"706","last_page":"711"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.958299994468689,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8686822056770325},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7524414658546448},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6727975010871887},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6393077969551086},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5734019875526428},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5635640025138855},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5294216275215149},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5179263949394226},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4996638298034668},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4189518094062805}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8686822056770325},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7524414658546448},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6727975010871887},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6393077969551086},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5734019875526428},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5635640025138855},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5294216275215149},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5179263949394226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4996638298034668},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4189518094062805},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/codit58514.2023.10284089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/codit58514.2023.10284089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 9th International Conference on Control, Decision and Information Technologies (CoDIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4350348094","display_name":null,"funder_award_id":"80NSSC21M0087-21-S06","funder_id":"https://openalex.org/F4320306101","funder_display_name":"National Aeronautics and Space Administration"}],"funders":[{"id":"https://openalex.org/F4320306101","display_name":"National Aeronautics and Space Administration","ror":"https://ror.org/027ka1x80"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W34573732","https://openalex.org/W122178443","https://openalex.org/W1603035390","https://openalex.org/W2070267857","https://openalex.org/W2113953866","https://openalex.org/W2604763608","https://openalex.org/W2728818360","https://openalex.org/W2766560519","https://openalex.org/W2769440362","https://openalex.org/W2924907019","https://openalex.org/W2969856879","https://openalex.org/W2970531905","https://openalex.org/W2971729523","https://openalex.org/W2980113592","https://openalex.org/W2991394772","https://openalex.org/W3034942609","https://openalex.org/W3036006021","https://openalex.org/W3100625157","https://openalex.org/W3141797743","https://openalex.org/W3163842339","https://openalex.org/W3184646938","https://openalex.org/W3209651137","https://openalex.org/W4237591687","https://openalex.org/W4289694518","https://openalex.org/W4294690650","https://openalex.org/W6605098596","https://openalex.org/W6676875888","https://openalex.org/W6736057607","https://openalex.org/W6742058293","https://openalex.org/W6753758835","https://openalex.org/W6767629743","https://openalex.org/W6771304822","https://openalex.org/W6798414287"],"related_works":["https://openalex.org/W3176564347","https://openalex.org/W3031039437","https://openalex.org/W3204184292","https://openalex.org/W1985458517","https://openalex.org/W2355833770","https://openalex.org/W3101398262","https://openalex.org/W3095877357","https://openalex.org/W4386821099","https://openalex.org/W4386121542","https://openalex.org/W2768698792"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,102,197],"leverage":[4],"ideas":[5],"from":[6,82,111],"model-based":[7,105],"control":[8,59,75,113,178],"to":[9,41,49,117,139,141,174],"address":[10],"the":[11,31,46,58,71,83,118,135,147,187,201],"sample":[12],"efficiency":[13],"problem":[14],"of":[15,26,33,45,57,73,95,149],"reinforcement":[16,136,167],"learning":[17,21,38,137,168],"(RL)":[18],"algorithms.":[19],"Accelerating":[20],"is":[22,66,124,161,184,194],"an":[23,132,180,191],"active":[24],"field":[25],"RL":[27],"highly":[28],"relevant":[29],"in":[30,98,186],"context":[32],"time-varying":[34],"systems.":[35],"Traditional":[36],"transfer":[37,97,123,207],"methods":[39],"propose":[40,103],"use":[42],"prior":[43],"knowledge":[44],"system":[47],"behavior":[48],"devise":[50],"a":[51,104,112,121,142],"gradual":[52],"or":[53],"immediate":[54],"data-driven":[55],"transformation":[56,65,127],"policy":[60,114],"obtained":[61],"through":[62,152],"RL.":[63],"Such":[64],"usually":[67],"computed":[68],"by":[69],"estimating":[70],"performance":[72,148,173],"previous":[74],"policies":[76],"based":[77],"on":[78],"measurements":[79],"recently":[80],"collected":[81],"system.":[84],"However,":[85],"such":[86,107],"retrospective":[87],"measures":[88],"have":[89],"debatable":[90],"utility":[91],"with":[92,166,208],"no":[93],"guarantees":[94,205],"positive":[96,122,206],"most":[99],"cases.":[100,189],"Instead,":[101],"transformation,":[106],"that":[108,158,200],"when":[109,179],"actions":[110],"are":[115],"applied":[116],"target":[119],"system,":[120],"achieved.":[125],"The":[126],"can":[128],"be":[129],"used":[130],"as":[131],"initialization":[133],"for":[134],"process":[138],"converge":[140],"new":[143],"optimum.":[144],"We":[145,156],"validate":[146],"our":[150,159],"approach":[151,160,203],"four":[153],"benchmark":[154],"examples.":[155],"demonstrate":[157],"more":[162],"sample-efficient":[163],"than":[164],"fine-tuning":[165],"alone":[169],"and":[170,176],"achieves":[171],"comparable":[172],"linear-quadratic-regulators":[175],"model-predictive":[177],"accurate":[181,192],"linear":[182],"model":[183,193],"known":[185],"three":[188],"If":[190],"not":[195],"known,":[196],"empirically":[198],"show":[199],"proposed":[202],"still":[204],"jump-start":[209],"improvement.":[210]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
