{"id":"https://openalex.org/W2142012891","doi":"https://doi.org/10.1109/ijcnn.2008.4634160","title":"Uncertainty propagation for quality assurance in Reinforcement Learning","display_name":"Uncertainty propagation for quality assurance in Reinforcement Learning","publication_year":2008,"publication_date":"2008-06-01","ids":{"openalex":"https://openalex.org/W2142012891","doi":"https://doi.org/10.1109/ijcnn.2008.4634160","mag":"2142012891"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2008.4634160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2008.4634160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072736085","display_name":"Daniel Schneega\u00df","orcid":null},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Daniel Schneegass","raw_affiliation_strings":["Learning Systems Department of the Information & Communications Corporate Technology, Siemens AG","Corp. Technol., Inf. & Commun., Learning Syst. Dept., Siemens AG, Munich"],"affiliations":[{"raw_affiliation_string":"Learning Systems Department of the Information & Communications Corporate Technology, Siemens AG","institution_ids":["https://openalex.org/I1325886976"]},{"raw_affiliation_string":"Corp. Technol., Inf. & Commun., Learning Syst. Dept., Siemens AG, Munich","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035246650","display_name":"Steffen Udluft","orcid":"https://orcid.org/0000-0002-5767-2591"},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Steffen Udluft","raw_affiliation_strings":["Learning Systems Department of the Information & Communications Corporate Technology, Siemens AG","Corp. Technol., Inf. & Commun., Learning Syst. Dept., Siemens AG, Munich"],"affiliations":[{"raw_affiliation_string":"Learning Systems Department of the Information & Communications Corporate Technology, Siemens AG","institution_ids":["https://openalex.org/I1325886976"]},{"raw_affiliation_string":"Corp. Technol., Inf. & Commun., Learning Syst. Dept., Siemens AG, Munich","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014856404","display_name":"Thomas Martinetz","orcid":"https://orcid.org/0000-0002-4539-4475"},"institutions":[{"id":"https://openalex.org/I9341345","display_name":"University of L\u00fcbeck","ror":"https://ror.org/00t3r8h32","country_code":"DE","type":"education","lineage":["https://openalex.org/I9341345"]},{"id":"https://openalex.org/I4210092939","display_name":"Institute for Integrative and Experimental Genomics","ror":"https://ror.org/00fy7kr67","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210092939"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas Martinetz","raw_affiliation_strings":["Institute for Neuro-and Bioinformatics, University of Luebeck, Germany","Inst. for Neuro- & Bioinf., Univ. of Lubeck, Lubeck"],"affiliations":[{"raw_affiliation_string":"Institute for Neuro-and Bioinformatics, University of Luebeck, Germany","institution_ids":["https://openalex.org/I9341345","https://openalex.org/I4210092939"]},{"raw_affiliation_string":"Inst. for Neuro- & Bioinf., Univ. of Lubeck, Lubeck","institution_ids":["https://openalex.org/I9341345"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072736085"],"corresponding_institution_ids":["https://openalex.org/I1325886976"],"apc_list":null,"apc_paid":null,"fwci":2.3415,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90353387,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"19","issue":null,"first_page":"2588","last_page":"2595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8492588400840759},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6800976991653442},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6673189401626587},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.633317232131958},{"id":"https://openalex.org/keywords/propagation-of-uncertainty","display_name":"Propagation of uncertainty","score":0.624904215335846},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5425382852554321},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.5295582413673401},{"id":"https://openalex.org/keywords/quality-assurance","display_name":"Quality assurance","score":0.5102477073669434},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.48848140239715576},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.474265456199646},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4691649377346039},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.46631038188934326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3740580081939697},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1916584074497223},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1425093412399292},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12081331014633179},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11442810297012329},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.09833380579948425}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8492588400840759},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6800976991653442},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6673189401626587},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.633317232131958},{"id":"https://openalex.org/C123614077","wikidata":"https://www.wikidata.org/wiki/Q1364905","display_name":"Propagation of uncertainty","level":2,"score":0.624904215335846},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5425382852554321},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.5295582413673401},{"id":"https://openalex.org/C106436119","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assurance","level":3,"score":0.5102477073669434},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48848140239715576},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.474265456199646},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4691649377346039},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.46631038188934326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3740580081939697},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1916584074497223},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1425093412399292},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12081331014633179},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11442810297012329},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.09833380579948425},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C2778618615","wikidata":"https://www.wikidata.org/wiki/Q4008393","display_name":"External quality assessment","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn.2008.4634160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2008.4634160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.629.329","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.629.329","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.inb.uni-luebeck.de/publikationen/pdfs/ScUdMa08.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6100000143051147,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1496855202","https://openalex.org/W1508229132","https://openalex.org/W1552327263","https://openalex.org/W1557941966","https://openalex.org/W1576452626","https://openalex.org/W1607198972","https://openalex.org/W1730555343","https://openalex.org/W1748709110","https://openalex.org/W1979740015","https://openalex.org/W2006278461","https://openalex.org/W2058066080","https://openalex.org/W2100785108","https://openalex.org/W2104753538","https://openalex.org/W2116459397","https://openalex.org/W2121863487","https://openalex.org/W2129426113","https://openalex.org/W2132849848","https://openalex.org/W2151268438","https://openalex.org/W2156974606","https://openalex.org/W2161521419","https://openalex.org/W2164569010","https://openalex.org/W2312609093","https://openalex.org/W2334782222","https://openalex.org/W3140968660","https://openalex.org/W4205326910","https://openalex.org/W4214717370","https://openalex.org/W4231318850","https://openalex.org/W4242606736","https://openalex.org/W4285719527","https://openalex.org/W6629614444","https://openalex.org/W6637597983","https://openalex.org/W6675278435","https://openalex.org/W6679599195","https://openalex.org/W6683639705"],"related_works":["https://openalex.org/W4287880334","https://openalex.org/W4306904969","https://openalex.org/W4366700029","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698"],"abstract_inverted_index":{"In":[0,69],"this":[1,123],"paper":[2],"we":[3,73,121],"address":[4],"the":[5,31,38,41,45,50,59,67,75,84,94,114],"reliability":[6],"of":[7,17,40,96,113],"policies":[8],"derived":[9,32],"by":[10,27],"Reinforcement":[11],"Learning":[12],"on":[13,99,125],"a":[14,24,70,81,89],"limited":[15],"amount":[16],"observations.":[18],"This":[19],"can":[20,117],"be":[21,118],"done":[22],"in":[23],"principled":[25],"manner":[26],"taking":[28],"into":[29],"account":[30],"Q-functionpsilas":[33],"uncertainty,":[34],"which":[35],"stems":[36],"from":[37],"uncertainty":[39,55],"estimators":[42],"used":[43],"for":[44,66,105],"MDPpsilas":[46],"transition":[47],"probabilities":[48],"and":[49,62,102],"reward":[51],"function.":[52],"We":[53,92],"apply":[54],"propagation":[56],"parallelly":[57],"to":[58,79,128],"Bellman":[60,76],"iteration":[61],"achieve":[63,80],"confidence":[64],"intervals":[65],"Q-function.":[68],"second":[71],"step":[72],"change":[74],"operator":[77],"as":[78],"policy":[82],"guaranteeing":[83],"highest":[85],"minimum":[86],"performance":[87,116],"with":[88],"given":[90],"probability.":[91],"demonstrate":[93],"functionality":[95],"our":[97],"method":[98],"artificial":[100],"examples":[101],"show":[103],"that,":[104],"an":[106,111,126],"important":[107],"problem":[108],"class":[109],"even":[110],"enhancement":[112],"expected":[115],"obtained.":[119],"Finally":[120],"verify":[122],"observation":[124],"application":[127],"gas":[129],"turbine":[130],"control.":[131]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
