{"id":"https://openalex.org/W2151702863","doi":"https://doi.org/10.1109/adprl.2011.5967358","title":"Agent self-assessment: Determining policy quality without execution","display_name":"Agent self-assessment: Determining policy quality without execution","publication_year":2011,"publication_date":"2011-04-01","ids":{"openalex":"https://openalex.org/W2151702863","doi":"https://doi.org/10.1109/adprl.2011.5967358","mag":"2151702863"},"language":"en","primary_location":{"id":"doi:10.1109/adprl.2011.5967358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2011.5967358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006023562","display_name":"Alexander Hans","orcid":null},"institutions":[{"id":"https://openalex.org/I119449181","display_name":"Technische Universit\u00e4t Ilmenau","ror":"https://ror.org/01weqhp73","country_code":"DE","type":"education","lineage":["https://openalex.org/I119449181"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Alexander Hans","raw_affiliation_strings":["Neuroinformatics and Cognitive Robotics Laboratory, Ilmenau University of Technology, Ilmenau, Germany"],"affiliations":[{"raw_affiliation_string":"Neuroinformatics and Cognitive Robotics Laboratory, Ilmenau University of Technology, Ilmenau, Germany","institution_ids":["https://openalex.org/I119449181"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036480445","display_name":"Siegmund Duell","orcid":null},"institutions":[{"id":"https://openalex.org/I4210148503","display_name":"Fraunhofer Institute for Production Systems and Design Technology","ror":"https://ror.org/045eg9c12","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210148503","https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Siegmund Duell","raw_affiliation_strings":["Machine Learning Group, Berlin Institute of Technology, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Machine Learning Group, Berlin Institute of Technology, Berlin, Germany","institution_ids":["https://openalex.org/I4210148503","https://openalex.org/I4577782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035246650","display_name":"Steffen Udluft","orcid":"https://orcid.org/0000-0002-5767-2591"},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Steffen Udluft","raw_affiliation_strings":["Intelligent Systems and Control, Siemens AG, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Intelligent Systems and Control, Siemens AG, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006023562"],"corresponding_institution_ids":["https://openalex.org/I119449181"],"apc_list":null,"apc_paid":null,"fwci":0.4386,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.74841093,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"84","last_page":"90"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9708999991416931,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8477392196655273},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8251438140869141},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8180657029151917},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.655879020690918},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5831164121627808},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5435492992401123},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4750675857067108},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.43567243218421936},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.42758649587631226},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3686283230781555},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2349839210510254},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.18018117547035217},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.16763073205947876}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8477392196655273},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8251438140869141},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8180657029151917},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.655879020690918},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5831164121627808},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5435492992401123},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4750675857067108},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.43567243218421936},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.42758649587631226},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3686283230781555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2349839210510254},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18018117547035217},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.16763073205947876},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/adprl.2011.5967358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2011.5967358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.710.4631","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.710.4631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.tu-ilmenau.de/fileadmin/media/neurob/publications/conferences_int/2011/Hans-ADPRL-2011-fin.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W142764021","https://openalex.org/W166862392","https://openalex.org/W215404843","https://openalex.org/W1496855202","https://openalex.org/W1576516403","https://openalex.org/W1578862674","https://openalex.org/W2006278461","https://openalex.org/W2009696375","https://openalex.org/W2093652578","https://openalex.org/W2098874357","https://openalex.org/W2120346334","https://openalex.org/W2121863487","https://openalex.org/W2129426113","https://openalex.org/W2142012891","https://openalex.org/W2143770247","https://openalex.org/W2160308170","https://openalex.org/W2341171179","https://openalex.org/W2586680856","https://openalex.org/W2593895158","https://openalex.org/W4214717370","https://openalex.org/W4285719527","https://openalex.org/W6605834550","https://openalex.org/W6606719070","https://openalex.org/W6629614444","https://openalex.org/W6677737365","https://openalex.org/W6683472505","https://openalex.org/W6704298589"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698"],"abstract_inverted_index":{"With":[0],"the":[1,24,48,62,68,87,91,95,121,144,157,166],"development":[2],"of":[3,16,26,97,171],"data-efficient":[4],"reinforcement":[5],"learning":[6],"(RL)":[7],"methods,":[8],"a":[9,29,38,103,130,169],"promising":[10],"data-driven":[11],"solution":[12],"for":[13],"optimal":[14],"control":[15],"complex":[17],"technical":[18,30],"systems":[19],"has":[20],"become":[21],"available.":[22],"For":[23],"application":[25,96],"RL":[27,98],"to":[28,36,44,66,85,99,119,128],"system,":[31],"it":[32,43,46],"is":[33],"usually":[34],"required":[35,53],"evaluate":[37,86,165],"policy":[39,69,88],"before":[40],"actually":[41],"applying":[42],"ensure":[45],"operates":[47],"system":[49,63,93],"safely":[50],"and":[51,133,164],"within":[52],"performance":[54],"bounds.":[55],"In":[56,71],"benchmark":[57,172],"applications":[58],"one":[59],"can":[60,148],"use":[61,120],"dynamics":[64],"directly":[65],"measure":[67],"quality.":[70],"real":[72],"applications,":[73],"however,":[74],"this":[75,115,153],"might":[76],"be":[77,149],"too":[78],"expensive":[79],"or":[80],"even":[81],"impossible.":[82],"Being":[83],"unable":[84],"without":[89],"using":[90,168],"actual":[92],"hinders":[94],"autonomous":[100],"controllers.":[101],"As":[102],"first":[104],"step":[105],"toward":[106],"agent":[107],"self-assessment,":[108],"we":[109],"deal":[110],"with":[111,125,138],"discrete":[112],"MDPs":[113],"in":[114],"paper.":[116],"We":[117,151],"propose":[118],"value":[122,145,158],"function":[123,146],"along":[124],"its":[126],"uncertainty":[127,160,162],"assess":[129],"policy's":[131],"quality":[132],"show":[134],"that,":[135],"when":[136],"dealing":[137],"an":[139],"MDP":[140],"estimated":[141],"from":[142],"observations,":[143],"itself":[147],"misleading.":[150],"address":[152],"problem":[154],"by":[155],"determining":[156],"function's":[159],"through":[161],"propagation":[163],"approach":[167],"number":[170],"applications.":[173]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
