{"id":"https://openalex.org/W3201108120","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534062","title":"Trust, but Verify: Alleviating Pessimistic Errors in Model-Based Exploration","display_name":"Trust, but Verify: Alleviating Pessimistic Errors in Model-Based Exploration","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3201108120","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534062","mag":"3201108120"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9534062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087092802","display_name":"Konrad Czechowski","orcid":"https://orcid.org/0000-0002-9384-1137"},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Konrad Czechowski","raw_affiliation_strings":["Faculty of Mathematics, Informatics and Mechanics, University of Warsaw"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Informatics and Mechanics, University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043388455","display_name":"Tomasz Odrzyg\u00f3\u017ad\u017a","orcid":null},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Tomasz Odrzygozdz","raw_affiliation_strings":["Faculty of Mathematics, Informatics and Mechanics, University of Warsaw"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Informatics and Mechanics, University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090792727","display_name":"Micha\u0142 Izworski","orcid":null},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Michal Izworski","raw_affiliation_strings":["Faculty of Mathematics, Informatics and Mechanics, University of Warsaw"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Informatics and Mechanics, University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037418059","display_name":"Marek Zbysi\u0144ski","orcid":null},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Marek Zbysinski","raw_affiliation_strings":["Faculty of Mathematics, Informatics and Mechanics, University of Warsaw"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Informatics and Mechanics, University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055195176","display_name":"\u0141ukasz Kuci\u0144ski","orcid":null},"institutions":[{"id":"https://openalex.org/I4210120266","display_name":"Czech Academy of Sciences, Institute of Mathematics","ror":"https://ror.org/02tv1yf50","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210120266"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Lukasz Kucinski","raw_affiliation_strings":["Institute of Mathematics, Polish Academy of Science"],"affiliations":[{"raw_affiliation_string":"Institute of Mathematics, Polish Academy of Science","institution_ids":["https://openalex.org/I4210120266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089671067","display_name":"Piotr Mi\u0142o\u015b","orcid":null},"institutions":[{"id":"https://openalex.org/I4210120266","display_name":"Czech Academy of Sciences, Institute of Mathematics","ror":"https://ror.org/02tv1yf50","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210120266"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Piotr Milos","raw_affiliation_strings":["Institute of Mathematics, Polish Academy of Science"],"affiliations":[{"raw_affiliation_string":"Institute of Mathematics, Polish Academy of Science","institution_ids":["https://openalex.org/I4210120266"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5087092802"],"corresponding_institution_ids":["https://openalex.org/I4654613"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13340429,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"87","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7126266360282898},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6642481088638306},{"id":"https://openalex.org/keywords/pessimism","display_name":"Pessimism","score":0.6479687690734863},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.5705918669700623},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.531897783279419},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4872843027114868},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.422495573759079},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3302140235900879},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.28671735525131226}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7126266360282898},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6642481088638306},{"id":"https://openalex.org/C9992130","wikidata":"https://www.wikidata.org/wiki/Q484954","display_name":"Pessimism","level":2,"score":0.6479687690734863},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.5705918669700623},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.531897783279419},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4872843027114868},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.422495573759079},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3302140235900879},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28671735525131226},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9534062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6824486362","display_name":null,"funder_award_id":"UMO-2017/26/E/ST6/00622","funder_id":"https://openalex.org/F4320335039","funder_display_name":"Narodowe Centrum Bada\u0144 i Rozwoju"}],"funders":[{"id":"https://openalex.org/F4320326815","display_name":"Infrastruktura PL-Grid","ror":null},{"id":"https://openalex.org/F4320335039","display_name":"Narodowe Centrum Bada\u0144 i Rozwoju","ror":"https://ror.org/05pwfyy15"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":112,"referenced_works":["https://openalex.org/W183472599","https://openalex.org/W1509593372","https://openalex.org/W1583155004","https://openalex.org/W1969483458","https://openalex.org/W1980035368","https://openalex.org/W2034806191","https://openalex.org/W2039522160","https://openalex.org/W2056740053","https://openalex.org/W2121863487","https://openalex.org/W2122410182","https://openalex.org/W2126316555","https://openalex.org/W2126677653","https://openalex.org/W2140365369","https://openalex.org/W2168405694","https://openalex.org/W2280163991","https://openalex.org/W2417786368","https://openalex.org/W2596982695","https://openalex.org/W2738669288","https://openalex.org/W2766447205","https://openalex.org/W2785389871","https://openalex.org/W2789824229","https://openalex.org/W2795158776","https://openalex.org/W2807588596","https://openalex.org/W2810754397","https://openalex.org/W2890346000","https://openalex.org/W2892230114","https://openalex.org/W2898585858","https://openalex.org/W2902907165","https://openalex.org/W2920362155","https://openalex.org/W2922212713","https://openalex.org/W2947096529","https://openalex.org/W2949571867","https://openalex.org/W2953708620","https://openalex.org/W2953772919","https://openalex.org/W2960567166","https://openalex.org/W2962723954","https://openalex.org/W2962865140","https://openalex.org/W2962872206","https://openalex.org/W2962893049","https://openalex.org/W2963103574","https://openalex.org/W2963160877","https://openalex.org/W2963276097","https://openalex.org/W2963355572","https://openalex.org/W2963523627","https://openalex.org/W2963639957","https://openalex.org/W2963820385","https://openalex.org/W2963846183","https://openalex.org/W2963938771","https://openalex.org/W2964067469","https://openalex.org/W2964295739","https://openalex.org/W2970277495","https://openalex.org/W2970720334","https://openalex.org/W2971331890","https://openalex.org/W2974778612","https://openalex.org/W2989321433","https://openalex.org/W2991934429","https://openalex.org/W2993335716","https://openalex.org/W2994714051","https://openalex.org/W2995976796","https://openalex.org/W2996558468","https://openalex.org/W2997289589","https://openalex.org/W3012148463","https://openalex.org/W3025660841","https://openalex.org/W3040321451","https://openalex.org/W3041764008","https://openalex.org/W3100366369","https://openalex.org/W3123212791","https://openalex.org/W3172115140","https://openalex.org/W4214717370","https://openalex.org/W4234228486","https://openalex.org/W4246219036","https://openalex.org/W4287779179","https://openalex.org/W4287991549","https://openalex.org/W4288319859","https://openalex.org/W4288331462","https://openalex.org/W4293396018","https://openalex.org/W4297703537","https://openalex.org/W4297800542","https://openalex.org/W4321428174","https://openalex.org/W6630399036","https://openalex.org/W6678592152","https://openalex.org/W6695011786","https://openalex.org/W6716474083","https://openalex.org/W6717230150","https://openalex.org/W6735033012","https://openalex.org/W6735939104","https://openalex.org/W6745417605","https://openalex.org/W6748519856","https://openalex.org/W6748573137","https://openalex.org/W6748817170","https://openalex.org/W6749504895","https://openalex.org/W6752244597","https://openalex.org/W6753264383","https://openalex.org/W6754160304","https://openalex.org/W6754471908","https://openalex.org/W6755864697","https://openalex.org/W6756192075","https://openalex.org/W6756303580","https://openalex.org/W6760373680","https://openalex.org/W6760405395","https://openalex.org/W6762863188","https://openalex.org/W6763264829","https://openalex.org/W6764053384","https://openalex.org/W6764173040","https://openalex.org/W6764980988","https://openalex.org/W6767370964","https://openalex.org/W6770771444","https://openalex.org/W6771066560","https://openalex.org/W6774948183","https://openalex.org/W6778000925","https://openalex.org/W6780571964","https://openalex.org/W6780848903"],"related_works":["https://openalex.org/W4380987628","https://openalex.org/W2418537576","https://openalex.org/W2557514562","https://openalex.org/W214945085","https://openalex.org/W1987935396","https://openalex.org/W3126025002","https://openalex.org/W2354456418","https://openalex.org/W3197683035","https://openalex.org/W2374935031","https://openalex.org/W2034131677"],"abstract_inverted_index":{"We":[0,32,67],"propose":[1],"trust-but-verify":[2],"(TBV)":[3],"mechanism,":[4],"a":[5],"new":[6],"method":[7],"which":[8,40,46,94],"uses":[9],"model":[10,38],"uncertainty":[11],"estimates":[12],"to":[13,26,97],"guide":[14],"exploration.":[15,66],"The":[16],"mechanism":[17],"augments":[18],"graph":[19,51],"search":[20,52],"planning":[21],"algorithms":[22,53],"with":[23,28,81],"the":[24],"capacity":[25],"deal":[27],"learned":[29],"model's":[30],"imperfections.":[31],"identify":[33],"certain":[34],"type":[35],"of":[36],"frequent":[37],"errors,":[39],"we":[41],"dub":[42],"false":[43],"loops,":[44],"and":[45,63,71],"are":[47],"particularly":[48],"dangerous":[49],"for":[50],"in":[54],"discrete":[55],"environments.":[56],"These":[57],"errors":[58],"impose":[59],"falsely":[60],"pessimistic":[61],"expectations":[62],"thus":[64],"hinder":[65],"confirm":[68],"this":[69],"experimentally":[70],"show":[72],"that":[73],"TBV":[74,79],"can":[75],"effectively":[76],"alleviate":[77],"them.":[78],"combined":[80],"MCTS":[82],"or":[83],"Best":[84],"First":[85],"Search":[86],"forms":[87],"an":[88],"effective":[89],"model-based":[90],"reinforcement":[91],"learning":[92],"solution,":[93],"is":[95],"able":[96],"robustly":[98],"solve":[99],"sparse":[100],"reward":[101],"problems.":[102]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
