{"id":"https://openalex.org/W4288055195","doi":"https://doi.org/10.48550/arxiv.2207.12141","title":"Live in the Moment: Learning Dynamics Model Adapted to Evolving Policy","display_name":"Live in the Moment: Learning Dynamics Model Adapted to Evolving Policy","publication_year":2022,"publication_date":"2022-07-25","ids":{"openalex":"https://openalex.org/W4288055195","doi":"https://doi.org/10.48550/arxiv.2207.12141"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2207.12141","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.12141","pdf_url":"https://arxiv.org/pdf/2207.12141","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2207.12141","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114585283","display_name":"Xiyao Wang","orcid":"https://orcid.org/0009-0004-9006-0281"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Xiyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022607158","display_name":"Wichayaporn Wongkamjan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wongkamjan, Wichayaporn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091135797","display_name":"Furong Huang","orcid":"https://orcid.org/0000-0001-8760-439X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Furong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5114585283"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9434000253677368,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9434000253677368,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7979977130889893},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6848212480545044},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.6235584020614624},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5668279528617859},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5564951300621033},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.5528895854949951},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5110138654708862},{"id":"https://openalex.org/keywords/system-dynamics","display_name":"System dynamics","score":0.5034715533256531},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.47414347529411316},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47045934200286865},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3993988633155823},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22312170267105103},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17113801836967468},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1101728081703186}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7979977130889893},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6848212480545044},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.6235584020614624},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5668279528617859},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5564951300621033},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.5528895854949951},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5110138654708862},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.5034715533256531},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.47414347529411316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47045934200286865},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3993988633155823},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22312170267105103},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17113801836967468},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1101728081703186},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2207.12141","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.12141","pdf_url":"https://arxiv.org/pdf/2207.12141","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2207.12141","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2207.12141","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2207.12141","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.12141","pdf_url":"https://arxiv.org/pdf/2207.12141","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6800000071525574,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4211139642","https://openalex.org/W2181721856","https://openalex.org/W2381418473","https://openalex.org/W2025879371","https://openalex.org/W2032986081","https://openalex.org/W1502305485","https://openalex.org/W826886275","https://openalex.org/W326831310","https://openalex.org/W4211080513","https://openalex.org/W2227085772"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1],"learning":[2,15,110,121],"(RL)":[3],"often":[4],"achieves":[5,167],"higher":[6,174],"sample":[7,45,171],"efficiency":[8,172],"in":[9,49,79,162,170],"practice":[10],"than":[11],"model-free":[12],"RL":[13,182],"by":[14],"a":[16,28,117,156],"dynamics":[17,29,57,119],"model":[18,30,58,70,109,112,120,141],"to":[19,137,145],"generate":[20],"samples":[21],"for":[22,39,62,72],"policy":[23,78,88,134],"learning.":[24],"Previous":[25],"works":[26],"learn":[27],"that":[31,54,165],"fits":[32],"under":[33,59],"the":[34,44,56,60,73,77,108,132,139,146,151,179],"empirical":[35],"state-action":[36,93,147],"visitation":[37,94,148],"distribution":[38,61,95,102,136,149],"all":[40],"historical":[41,64,105,133],"policies,":[42],"i.e.,":[43],"replay":[46],"buffer.":[47],"However,":[48],"this":[50,101],"paper,":[51],"we":[52],"observe":[53],"fitting":[55],"\\emph{all":[63],"policies}":[65],"does":[66],"not":[67],"necessarily":[68],"benefit":[69],"prediction":[71],"\\emph{current":[74],"policy}":[75],"since":[76],"use":[80],"is":[81],"constantly":[82],"evolving":[83,87,152],"over":[84,104],"time.":[85],"The":[86],"during":[89],"training":[90],"will":[91],"cause":[92],"shifts.":[96],"We":[97,114],"theoretically":[98],"analyze":[99],"how":[100],"shift":[103],"policies":[106],"affects":[107],"and":[111,173],"rollouts.":[113],"then":[115],"propose":[116],"novel":[118],"method,":[122],"named":[123],"\\textit{Policy-adapted":[124],"Dynamics":[125],"Model":[126],"Learning":[127],"(PDML)}.":[128],"PDML":[129,166],"dynamically":[130],"adjusts":[131],"mixture":[135],"ensure":[138],"learned":[140],"can":[142],"continually":[143],"adapt":[144],"of":[150,158],"policy.":[153],"Experiments":[154],"on":[155],"range":[157],"continuous":[159],"control":[160],"environments":[161],"MuJoCo":[163],"show":[164],"significant":[168],"improvement":[169],"asymptotic":[175],"performance":[176],"combined":[177],"with":[178],"state-of-the-art":[180],"model-based":[181],"methods.":[183]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
