{"id":"https://openalex.org/W2755631392","doi":"https://doi.org/10.1109/lra.2018.2800085","title":"Sparse Markov Decision Processes With Causal Sparse Tsallis Entropy Regularization for Reinforcement Learning","display_name":"Sparse Markov Decision Processes With Causal Sparse Tsallis Entropy Regularization for Reinforcement Learning","publication_year":2018,"publication_date":"2018-01-31","ids":{"openalex":"https://openalex.org/W2755631392","doi":"https://doi.org/10.1109/lra.2018.2800085","mag":"2755631392"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2018.2800085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2800085","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1709.06293","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100604922","display_name":"Kyungjae Lee","orcid":"https://orcid.org/0000-0003-0147-2715"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Kyungjae Lee","raw_affiliation_strings":["Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]"],"raw_orcid":"https://orcid.org/0000-0003-0147-2715","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047885515","display_name":"Sungjoon Choi","orcid":"https://orcid.org/0000-0002-3049-8212"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sungjoon Choi","raw_affiliation_strings":["Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033764106","display_name":"Songhwai Oh","orcid":"https://orcid.org/0000-0002-9781-2018"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Songhwai Oh","raw_affiliation_strings":["Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]"],"raw_orcid":"https://orcid.org/0000-0002-9781-2018","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering and Automation and Systems Research Institute (ASRI), Seoul National University, Seoul, South Korea]","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100604922"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.5077,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72433426,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"3","issue":"3","first_page":"1466","last_page":"1473"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9663000106811523,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.672808051109314},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6518526077270508},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.6372190713882446},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5222609639167786},{"id":"https://openalex.org/keywords/sparse-approximation","display_name":"Sparse approximation","score":0.5108382105827332},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47179335355758667},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4297303557395935},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.42002588510513306},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33246809244155884},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.32650062441825867},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3087034821510315},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07788434624671936}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.672808051109314},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6518526077270508},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.6372190713882446},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5222609639167786},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.5108382105827332},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47179335355758667},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4297303557395935},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.42002588510513306},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33246809244155884},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.32650062441825867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3087034821510315},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07788434624671936},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/lra.2018.2800085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2800085","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1709.06293","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1709.06293","pdf_url":"https://arxiv.org/pdf/1709.06293","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2755631392","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1709.06293.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1709.06293","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1709.06293","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1709.06293","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1709.06293","pdf_url":"https://arxiv.org/pdf/1709.06293","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7699999809265137,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2755631392.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W41159429","https://openalex.org/W1557517019","https://openalex.org/W1977655452","https://openalex.org/W1983874169","https://openalex.org/W2055501135","https://openalex.org/W2086592650","https://openalex.org/W2147996349","https://openalex.org/W2155968351","https://openalex.org/W2158782408","https://openalex.org/W2666874989","https://openalex.org/W2733312032","https://openalex.org/W4232512588","https://openalex.org/W6638914580","https://openalex.org/W6684921986","https://openalex.org/W6686121877","https://openalex.org/W6687681856","https://openalex.org/W6691766336","https://openalex.org/W6734517396","https://openalex.org/W6736495777","https://openalex.org/W6903351479"],"related_works":["https://openalex.org/W825274868","https://openalex.org/W2964286048","https://openalex.org/W3134155979","https://openalex.org/W2950038553","https://openalex.org/W2985353144","https://openalex.org/W2936582852","https://openalex.org/W3214073196","https://openalex.org/W2939368378","https://openalex.org/W2770040247","https://openalex.org/W3119228957","https://openalex.org/W2950771634","https://openalex.org/W3081892413","https://openalex.org/W1995249482","https://openalex.org/W1965193428","https://openalex.org/W1783546945","https://openalex.org/W2554489331","https://openalex.org/W3037347970","https://openalex.org/W2116805437","https://openalex.org/W2963297739","https://openalex.org/W2951951813"],"abstract_inverted_index":{"In":[0,139],"this":[1,129],"letter,":[2],"a":[3,23,31,52,58,65,105,109,116],"sparse":[4,12,24,32,41,53,59,66,76,86,106,143],"Markov":[5],"decision":[6],"process":[7],"(MDP)":[8],"with":[9,121],"novel":[10],"causal":[11,95],"Tsallis":[13],"entropy":[14,96],"regularization":[15,21,137],"is":[16,43,88,132],"proposed.":[17],"The":[18,34,84,149],"proposed":[19,40,85,150],"policy":[20,28],"induces":[22],"and":[25,68,73,161],"multimodal":[26],"optimal":[27],"distribution":[29],"of":[30,38,51,75,104,115,126,157],"MDP.":[33,54],"full":[35],"mathematical":[36],"analysis":[37],"the":[39,48,71,80,101,113,124,135,158],"MDP":[42,67,87,107,118],"provided.":[44],"We":[45,98],"first":[46],"analyze":[47],"optimality":[49,74],"condition":[50],"Then,":[55],"we":[56,141],"propose":[57],"value":[60,77],"iteration":[61,78],"method":[62,151],"that":[63,93,100],"solves":[64],"then":[69],"prove":[70],"convergence":[72,159],"using":[79],"Banach":[81],"fixed-point":[82],"theorem.":[83],"compared":[89],"to":[90,123,145],"soft":[91,117],"MDPs":[92,144],"utilize":[94],"regularization.":[97],"show":[99],"performance":[102,130],"error":[103,114,131],"has":[108],"constant":[110],"bound,":[111],"while":[112],"increases":[119],"logarithmically":[120],"respect":[122],"number":[125],"actions,":[127],"where":[128],"caused":[133],"by":[134],"introduced":[136],"term.":[138],"experiments,":[140],"apply":[142],"reinforcement":[146],"learning":[147],"problems.":[148],"outperforms":[152],"existing":[153],"methods":[154],"in":[155],"terms":[156],"speed":[160],"performance.":[162]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
