{"id":"https://openalex.org/W4392693740","doi":"https://doi.org/10.1109/tetci.2024.3369636","title":"Model-Based Off-Policy Deep Reinforcement Learning With Model-Embedding","display_name":"Model-Based Off-Policy Deep Reinforcement Learning With Model-Embedding","publication_year":2024,"publication_date":"2024-03-12","ids":{"openalex":"https://openalex.org/W4392693740","doi":"https://doi.org/10.1109/tetci.2024.3369636"},"language":"en","primary_location":{"id":"doi:10.1109/tetci.2024.3369636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3369636","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102869238","display_name":"Xiaoyu Tan","orcid":"https://orcid.org/0000-0003-3555-7143"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xiaoyu Tan","raw_affiliation_strings":["INF Technology Company, Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"INF Technology Company, Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015442079","display_name":"Chao Qu","orcid":"https://orcid.org/0009-0003-4246-9106"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chao Qu","raw_affiliation_strings":["INF Technology Company, Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"INF Technology Company, Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102937576","display_name":"Junwu Xiong","orcid":"https://orcid.org/0009-0008-2028-510X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junwu Xiong","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038286731","display_name":"James Y. Zhang","orcid":"https://orcid.org/0000-0001-6519-676X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"James Zhang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007950680","display_name":"Xihe Qiu","orcid":"https://orcid.org/0000-0003-4024-925X"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xihe Qiu","raw_affiliation_strings":["School of Electronic and Electrical Engineering, Shanghai University of Engineering Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Electrical Engineering, Shanghai University of Engineering Science, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032314861","display_name":"Yaochu Jin","orcid":"https://orcid.org/0000-0003-1100-0631"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaochu Jin","raw_affiliation_strings":["School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102869238"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7376,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85868776,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"8","issue":"4","first_page":"2974","last_page":"2986"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9031726121902466},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7329806089401245},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6319699287414551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6149911880493164},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4811233580112457},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.48059532046318054},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42315560579299927},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4111577272415161},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.0784635841846466}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9031726121902466},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7329806089401245},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6319699287414551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6149911880493164},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4811233580112457},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.48059532046318054},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42315560579299927},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4111577272415161},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0784635841846466},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetci.2024.3369636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3369636","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3593522634","display_name":null,"funder_award_id":"23ZR1425400","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"},{"id":"https://openalex.org/G4862170252","display_name":null,"funder_award_id":"62102241","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W1570233100","https://openalex.org/W1977671496","https://openalex.org/W2087617385","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2201581102","https://openalex.org/W2344786740","https://openalex.org/W2612281180","https://openalex.org/W2736750746","https://openalex.org/W2746553466","https://openalex.org/W2768956845","https://openalex.org/W2799151646","https://openalex.org/W2883364792","https://openalex.org/W2910568379","https://openalex.org/W2910913210","https://openalex.org/W2962872206","https://openalex.org/W2963864421","https://openalex.org/W2981037657","https://openalex.org/W3124201714","https://openalex.org/W3135239772","https://openalex.org/W3200466256","https://openalex.org/W3213333726","https://openalex.org/W4200574011","https://openalex.org/W4211194393","https://openalex.org/W4283311220","https://openalex.org/W4285762883","https://openalex.org/W4298857966","https://openalex.org/W4389363346","https://openalex.org/W6633971062","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6639949747","https://openalex.org/W6674884181","https://openalex.org/W6675999342","https://openalex.org/W6677939520","https://openalex.org/W6678367057","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6696380822","https://openalex.org/W6713603661","https://openalex.org/W6729448088","https://openalex.org/W6734517396","https://openalex.org/W6735913928","https://openalex.org/W6746581380","https://openalex.org/W6747473740","https://openalex.org/W6748519856","https://openalex.org/W6748839928","https://openalex.org/W6750186571","https://openalex.org/W6751087324","https://openalex.org/W6751494529","https://openalex.org/W6753183898","https://openalex.org/W6754471908","https://openalex.org/W6758960857","https://openalex.org/W6763990646","https://openalex.org/W6764053384","https://openalex.org/W6769218391","https://openalex.org/W6771217966","https://openalex.org/W6780559895","https://openalex.org/W6786873861","https://openalex.org/W6791283100","https://openalex.org/W6804326156","https://openalex.org/W6839280558","https://openalex.org/W6859152455"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2586732548"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1,13,86,98],"learning":[2,14,87],"(MBRL)":[3],"has":[4],"shown":[5],"its":[6],"advantages":[7],"in":[8,93,116,124],"sample":[9],"efficiency":[10],"over":[11],"model-free":[12],"(MFRL)":[15],"by":[16,31,49],"leveraging":[17],"control-based":[18],"domain":[19],"knowledge.":[20],"Despite":[21],"the":[22,35,41,51,59,65,94,104,122,125,133,146,151,156,159],"impressive":[23],"results":[24],"it":[25],"achieves,":[26],"MBRL":[27],"is":[28],"still":[29],"outperformed":[30],"MFRL":[32],"due":[33],"to":[34,71],"lack":[36],"of":[37,53,61,67,96,143,158],"unlimited":[38],"interactions":[39],"with":[40,89,145],"environment.":[42],"While":[43],"imaginary":[44,114,161],"data":[45,62,115,135],"can":[46,175],"be":[47,72],"generated":[48],"imagining":[50],"trajectories":[52],"future":[54],"states,":[55],"a":[56,79,90,140],"trade-off":[57],"between":[58],"usage":[60],"generation":[63],"and":[64,81,106,113,128,153,170],"influence":[66],"model":[68,91,107,123,152],"bias":[69],"remains":[70],"resolved.":[73],"In":[74,118],"this":[75],"paper,":[76],"we":[77,109,120,164],"propose":[78],"simple":[80],"elegant":[82],"off-policy":[83],"model-based":[84],"deep":[85],"algorithm":[88,174],"embedded":[92],"framework":[95],"probabilistic":[97],"learning,":[99],"called":[100],"MEMB.":[101],"To":[102],"balance":[103],"sample-efficiency":[105],"bias,":[108],"exploit":[110],"both":[111],"real":[112,134],"training.":[117],"particular,":[119],"embed":[121],"policy":[126],"update":[127],"learn":[129],"value":[130],"functions":[131],"from":[132],"set.":[136],"We":[137],"also":[138],"provide":[139],"theoretical":[141],"analysis":[142],"MEMB":[144,166],"Lipschitz":[147],"continuity":[148],"assumption":[149],"on":[150,167],"policy,":[154],"proving":[155],"reliability":[157],"short-term":[160],"rollout.":[162],"Finally,":[163],"evaluate":[165],"several":[168],"benchmarks":[169],"demonstrate":[171],"that":[172],"our":[173],"achieve":[176],"state-of-the-art":[177],"performance.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
