{"id":"https://openalex.org/W4413478205","doi":"https://doi.org/10.1109/tsmc.2025.3578666","title":"DOMAIN: Mildly Conservative Model-Based Offline Reinforcement Learning","display_name":"DOMAIN: Mildly Conservative Model-Based Offline Reinforcement Learning","publication_year":2025,"publication_date":"2025-07-08","ids":{"openalex":"https://openalex.org/W4413478205","doi":"https://doi.org/10.1109/tsmc.2025.3578666"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2025.3578666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3578666","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087606152","display_name":"Xiao-Yin Liu","orcid":"https://orcid.org/0000-0001-7407-2216"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao-Yin Liu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7407-2216","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110476418","display_name":"Xiao-Hu Zhou","orcid":"https://orcid.org/0000-0002-7602-4848"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao-Hu Zhou","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7602-4848","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035211277","display_name":"Mei-Jiang Gui","orcid":"https://orcid.org/0000-0001-9803-891X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mei-Jiang Gui","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000388617","display_name":"Guotao Li","orcid":"https://orcid.org/0000-0001-9201-2700"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo-Tao Li","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9201-2700","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038907015","display_name":"Xiao\u2010Liang Xie","orcid":"https://orcid.org/0000-0002-6227-4811"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao-Liang Xie","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6227-4811","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451053","display_name":"Shi-Qi Liu","orcid":"https://orcid.org/0000-0003-1790-8448"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shi-Qi Liu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1790-8448","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034269100","display_name":"Shuangyi Wang","orcid":"https://orcid.org/0000-0003-4316-3259"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuang-Yi Wang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4316-3259","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101736382","display_name":"Qichao Zhang","orcid":"https://orcid.org/0000-0003-2251-956X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi-Chao Zhang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012004938","display_name":"Biao Luo","orcid":"https://orcid.org/0000-0002-3353-2586"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Biao Luo","raw_affiliation_strings":["School of Automation, Central South University, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-3353-2586","affiliations":[{"raw_affiliation_string":"School of Automation, Central South University, Changsha, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109333020","display_name":"Zeng\u2010Guang Hou","orcid":"https://orcid.org/0000-0002-1534-5840"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeng-Guang Hou","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1534-5840","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7588,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88213458,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"55","issue":"10","first_page":"7142","last_page":"7155"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7271344065666199},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5332422256469727},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.512540340423584},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5106778740882874},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4099530279636383},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3361409902572632},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24698218703269958},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0894739031791687},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07006186246871948}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7271344065666199},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5332422256469727},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.512540340423584},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5106778740882874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4099530279636383},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3361409902572632},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24698218703269958},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0894739031791687},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07006186246871948},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2025.3578666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3578666","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1965628958","display_name":null,"funder_award_id":"62222316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3585780359","display_name":null,"funder_award_id":"2023YFC2415100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6392416268","display_name":null,"funder_award_id":"2024M763535","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G7524539446","display_name":null,"funder_award_id":"62303463","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7880899136","display_name":null,"funder_award_id":"82327801","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8712693307","display_name":null,"funder_award_id":"62373351","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8717948327","display_name":null,"funder_award_id":"62073325","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2746553466","https://openalex.org/W2904246096","https://openalex.org/W2908261578","https://openalex.org/W2968986602","https://openalex.org/W2999963940","https://openalex.org/W4214918501","https://openalex.org/W4281677141","https://openalex.org/W4294192828","https://openalex.org/W4360584316","https://openalex.org/W4386524114","https://openalex.org/W4386825405","https://openalex.org/W4386869676","https://openalex.org/W4388469888","https://openalex.org/W4390823897","https://openalex.org/W4391259941","https://openalex.org/W4392477589","https://openalex.org/W4396542660","https://openalex.org/W4399154505","https://openalex.org/W4401806721","https://openalex.org/W4403326725"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1],"learning":[2],"(RL),":[3],"which":[4,93,126],"learns":[5],"an":[6,21],"environment":[7],"model":[8,17,57,67,91,115,124,131],"from":[9],"the":[10,25,35,38,48,84,90,99,119,130,141,148,151,158,164,177,196,204],"offline":[11,31,53,109,172],"dataset":[12],"and":[13,40,55,75,83,117,175,195],"generates":[14],"more":[15],"out-of-distribution":[16],"data,":[18,92],"has":[19,176,199],"become":[20],"effective":[22],"approach":[23],"to":[24,34,50,77],"problem":[26],"of":[27,61,123,157,179,185],"distribution":[28,122],"shift":[29],"in":[30,80],"RL.":[32],"Due":[33],"gap":[36],"between":[37,89],"learned":[39,146],"actual":[41],"environment,":[42],"conservatism":[43,60],"should":[44],"be":[45],"incorporated":[46],"into":[47],"algorithm":[49,111],"balance":[51],"accurate":[52],"data":[54,132],"imprecise":[56],"data.":[58],"The":[59,183],"current":[62],"algorithms":[63,194],"mostly":[64],"relies":[65],"on":[66,203],"uncertainty":[68,71],"estimation.":[69],"However,":[70],"estimation":[72],"is":[73,153,166],"unreliable":[74],"leads":[76],"poor":[78],"performance":[79,198],"certain":[81],"scenarios,":[82],"previous":[85,170],"methods":[86],"ignore":[87],"differences":[88],"brings":[94],"great":[95],"conservatism.":[96],"To":[97],"address":[98],"above":[100],"issues,":[101],"this":[102,135],"article":[103],"proposes":[104],"a":[105,154],"mildly":[106],"conservative":[107,168],"model-based":[108,171],"RL":[110,173,193],"(DOMAIN)":[112],"without":[113],"estimating":[114],"uncertainty,":[116],"designs":[118],"adaptive":[120],"sampling":[121],"samples,":[125],"can":[127],"adaptively":[128],"adjust":[129],"penalty.":[133],"In":[134],"article,":[136],"we":[137],"theoretically":[138],"demonstrate":[139],"that":[140,189],"<italic":[142,160],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[143,161],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Q</i>":[144,162],"value":[145],"by":[147,201],"DOMAIN":[149,165,190],"outside":[150],"region":[152],"lower":[155],"bound":[156],"true":[159],"value,":[163],"less":[167],"than":[169],"algorithms,":[174],"guarantee":[178],"safety":[180],"policy":[181],"improvement.":[182],"results":[184],"extensive":[186],"experiments":[187],"show":[188],"outperforms":[191],"prior":[192],"average":[197],"improved":[200],"1.8%":[202],"D4RL":[205],"benchmark.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
