{"id":"https://openalex.org/W3117383065","doi":"https://doi.org/10.23919/eusipco47968.2020.9287440","title":"Exploring State Transition Uncertainty in Variational Reinforcement Learning","display_name":"Exploring State Transition Uncertainty in Variational Reinforcement Learning","publication_year":2020,"publication_date":"2020-12-18","ids":{"openalex":"https://openalex.org/W3117383065","doi":"https://doi.org/10.23919/eusipco47968.2020.9287440","mag":"3117383065"},"language":"en","primary_location":{"id":"doi:10.23919/eusipco47968.2020.9287440","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco47968.2020.9287440","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 28th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061908942","display_name":"Jen\u2010Tzung Chien","orcid":"https://orcid.org/0000-0003-3466-8941"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jen-Tzung Chien","raw_affiliation_strings":["National Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060502020","display_name":"Wei-Lin Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Lin Liao","raw_affiliation_strings":["National Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010597773","display_name":"Issam El Naqa","orcid":"https://orcid.org/0000-0001-6023-1132"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Issam El Naqa","raw_affiliation_strings":["University of Michigan, Ann Arbor, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, USA","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061908942"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":1.0605,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.82712003,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1527","last_page":"1531"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8541042804718018},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.637694239616394},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5421894192695618},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5187156796455383},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.5110543966293335},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5069277882575989},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.48997488617897034},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.47002220153808594},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4478786289691925},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3236468434333801},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2757799029350281},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24044334888458252}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8541042804718018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.637694239616394},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5421894192695618},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5187156796455383},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.5110543966293335},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5069277882575989},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.48997488617897034},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.47002220153808594},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4478786289691925},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3236468434333801},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2757799029350281},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24044334888458252},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/eusipco47968.2020.9287440","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco47968.2020.9287440","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 28th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7599999904632568,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1863227302","https://openalex.org/W1959608418","https://openalex.org/W2120889539","https://openalex.org/W2129703168","https://openalex.org/W2164411961","https://openalex.org/W2192203593","https://openalex.org/W2417786368","https://openalex.org/W2570734388","https://openalex.org/W2739936944","https://openalex.org/W2749679082","https://openalex.org/W2751973545","https://openalex.org/W2766329790","https://openalex.org/W2787666871","https://openalex.org/W2889008527","https://openalex.org/W2951266961","https://openalex.org/W2963523627","https://openalex.org/W2963524109","https://openalex.org/W2963639957","https://openalex.org/W2964121744","https://openalex.org/W2973112754","https://openalex.org/W4300802867","https://openalex.org/W6631190155","https://openalex.org/W6640963894","https://openalex.org/W6660504773","https://openalex.org/W6716474083","https://openalex.org/W6743300111","https://openalex.org/W6754395538","https://openalex.org/W6764214684","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W4225571923","https://openalex.org/W2166117066","https://openalex.org/W3212257828","https://openalex.org/W3087814763","https://openalex.org/W2357975469","https://openalex.org/W2999580272","https://openalex.org/W2136202932","https://openalex.org/W4376605461"],"abstract_inverted_index":{"Model-free":[0],"agent":[1],"in":[2,11,26,83,106],"reinforcement":[3,176],"learning":[4,56],"(RL)":[5],"generally":[6],"performs":[7,168],"well":[8],"but":[9],"inefficient":[10],"training":[12],"process":[13],"with":[14,144],"sparse":[15],"data.":[16],"A":[17,139],"practical":[18],"solution":[19],"is":[20,81,92,116,153],"to":[21,34,95,119,134],"incorporate":[22],"a":[23,54,84,145],"model-based":[24,85],"module":[25],"model-free":[27],"agent.":[28,86],"State":[29],"transition":[30,65,80,111],"can":[31],"be":[32],"learned":[33],"make":[35],"desirable":[36],"prediction":[37],"of":[38,77,101,150],"next":[39],"state":[40,44,79,104],"based":[41,68,128],"on":[42,69,129,156],"current":[43],"and":[45,97,103,137,159],"action":[46],"at":[47],"each":[48],"time":[49],"step.":[50],"This":[51],"paper":[52],"presents":[53],"new":[55],"representation":[57],"for":[58,175],"variational":[59,71],"RL":[60],"by":[61,123],"introducing":[62],"the":[63,70,75,99,120,125,130,148,164,171],"so-called":[64],"uncertainty":[66,76,112,131],"critic":[67],"encoder-decoder":[72],"network":[73],"where":[74],"structured":[78],"encoded":[82],"In":[87],"particular,":[88],"an":[89],"action-gating":[90],"mechanism":[91],"carried":[93],"out":[94],"learn":[96],"decode":[98],"trajectory":[100],"actions":[102],"transitions":[105],"latent":[107,141],"variable":[108,142],"space.":[109],"The":[110],"maximizing":[113],"exploration":[114,173],"(TUME)":[115],"performed":[117],"according":[118],"entropy":[121],"search":[122],"using":[124,147],"intrinsic":[126],"reward":[127],"measure":[132],"corresponding":[133],"different":[135],"states":[136],"actions.":[138],"dedicate":[140],"model":[143],"penalty":[146],"bias":[149],"state-action":[151],"value":[152],"developed.":[154],"Experiments":[155],"Cart":[157],"Pole":[158],"dialogue":[160],"system":[161],"show":[162],"that":[163],"proposed":[165],"TUME":[166],"considerably":[167],"better":[169],"than":[170],"other":[172],"methods":[174],"learning.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
