{"id":"https://openalex.org/W4392477589","doi":"https://doi.org/10.1109/tai.2024.3372939","title":"Model-Based Offline Reinforcement Learning With Uncertainty Estimation and Policy Constraint","display_name":"Model-Based Offline Reinforcement Learning With Uncertainty Estimation and Policy Constraint","publication_year":2024,"publication_date":"2024-03-05","ids":{"openalex":"https://openalex.org/W4392477589","doi":"https://doi.org/10.1109/tai.2024.3372939"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2024.3372939","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3372939","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jin Zhu","orcid":"https://orcid.org/0000-0002-6038-4339"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jin Zhu","raw_affiliation_strings":["Department of Automation, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chunhui Du","orcid":"https://orcid.org/0009-0003-4562-4941"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunhui Du","raw_affiliation_strings":["Department of Automation, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061952552","display_name":"Geir E. Dullerud","orcid":"https://orcid.org/0000-0003-0596-6050"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Geir E. Dullerud","raw_affiliation_strings":["Department of Mechanical Science and Engineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Science and Engineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":2.2598,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.87373517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"5","issue":"12","first_page":"6066","last_page":"6079"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.8830000162124634,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.8830000162124634,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.8748000264167786,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.7656999826431274,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7847387790679932},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6112222075462341},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5661026239395142},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5576168894767761},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.468593031167984},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4308745265007019},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.381553053855896},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.34705978631973267},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.3207141160964966},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.21093040704727173},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.196891188621521},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10936453938484192},{"id":"https://openalex.org/keywords/management","display_name":"Management","score":0.05287906527519226},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.0504833459854126}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7847387790679932},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6112222075462341},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5661026239395142},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5576168894767761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.468593031167984},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4308745265007019},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.381553053855896},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.34705978631973267},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3207141160964966},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.21093040704727173},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.196891188621521},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10936453938484192},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.05287906527519226},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0504833459854126},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2024.3372939","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3372939","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5653458562","display_name":null,"funder_award_id":"2008085MF198","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"}],"funders":[{"id":"https://openalex.org/F4320334897","display_name":"Natural Science Foundation of Anhui Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1491843047","https://openalex.org/W2964053787","https://openalex.org/W3162079807","https://openalex.org/W3183420623","https://openalex.org/W3210873862","https://openalex.org/W3213490051","https://openalex.org/W3216656735","https://openalex.org/W4283797777","https://openalex.org/W4317038451","https://openalex.org/W4322576602","https://openalex.org/W4403326725","https://openalex.org/W6637967152","https://openalex.org/W6683161245","https://openalex.org/W6688325169","https://openalex.org/W6747473740","https://openalex.org/W6748519856","https://openalex.org/W6748839928","https://openalex.org/W6751030608","https://openalex.org/W6751494529","https://openalex.org/W6754471908","https://openalex.org/W6757469721","https://openalex.org/W6763704811","https://openalex.org/W6764053384","https://openalex.org/W6771270455","https://openalex.org/W6776601253","https://openalex.org/W6777091672","https://openalex.org/W6777656069","https://openalex.org/W6779265984","https://openalex.org/W6787971688","https://openalex.org/W6790458829","https://openalex.org/W6791413555","https://openalex.org/W6795014841","https://openalex.org/W6796589144","https://openalex.org/W6801801719","https://openalex.org/W6802659552","https://openalex.org/W6803888866","https://openalex.org/W6810936812","https://openalex.org/W6839908325","https://openalex.org/W6840437412","https://openalex.org/W6845414265","https://openalex.org/W6846274233","https://openalex.org/W6848761080"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656","https://openalex.org/W2072376847"],"abstract_inverted_index":{"Explicit":[0],"uncertainty":[1,30,99,119],"estimation":[2,90,100],"is":[3,53,127,170],"an":[4],"effective":[5],"method":[6,25,126,145],"for":[7,118],"addressing":[8],"the":[9,20,35,56,69,84,131,154,158,166],"overestimation":[10],"problem":[11],"caused":[12],"by":[13,55,68],"distribution":[14],"shifts":[15],"in":[16],"offline":[17,38,43,46,175],"RL.":[18,39],"However,":[19],"common":[21],"bootstrapped":[22],"ensemble":[23,111,115],"network":[24],"fails":[26],"to":[27,96,110],"obtain":[28,97],"reliable":[29,98],"estimation,":[31],"which":[32,73],"will":[33,65,74],"decrease":[34],"performance":[36],"of":[37,62],"Compared":[40],"with":[41,88,173],"model-free":[42],"RL,":[44],"model-based":[45],"RL":[47,87,176],"provides":[48],"better":[49],"generalizability":[50],"although":[51],"it":[52],"limited":[54],"model-bias":[57],"problem.":[58],"The":[59],"adverse":[60],"effects":[61],"model":[63],"bias":[64],"be":[66],"aggravated":[67],"state":[70,103,149],"mismatch":[71],"phenomenon":[72],"ultimately":[75],"disrupt":[76],"policy":[77,124,137],"learning.":[78],"In":[79],"this":[80],"paper,":[81],"we":[82,106,140,152],"propose":[83,114],"Model-based":[85],"Offline":[86],"Uncertainty":[89],"and":[91,101,113,139],"Policy":[92],"constraint":[93,125,135],"(MOUP)":[94],"algorithm":[95,156,169],"bounded":[102,148],"mismatch.":[104,150],"Firstly,":[105],"introduce":[107],"MC":[108],"dropout":[109,116],"networks":[112,117],"estimation.":[120],"Secondly,":[121],"a":[122,144],"novel":[123],"given":[128],"that":[129,142,165],"incorporates":[130],"maximum":[132],"mean":[133],"discrepancy":[134],"into":[136],"optimization,":[138],"prove":[141],"such":[143],"can":[146],"generate":[147],"Finally,":[151],"evaluate":[153],"MOUP":[155,168],"on":[157],"MuJoCo":[159],"control":[160],"toolkit.":[161],"Experimental":[162],"results":[163],"show":[164],"proposed":[167],"competitive":[171],"compared":[172],"existing":[174],"algorithms.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
