{"id":"https://openalex.org/W2001525405","doi":"https://doi.org/10.1109/tac.2014.2301558","title":"Online Markov Decision Processes With Kullback\u2013Leibler Control Cost","display_name":"Online Markov Decision Processes With Kullback\u2013Leibler Control Cost","publication_year":2014,"publication_date":"2014-01-31","ids":{"openalex":"https://openalex.org/W2001525405","doi":"https://doi.org/10.1109/tac.2014.2301558","mag":"2001525405"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2014.2301558","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2014.2301558","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071571999","display_name":"Peng Guan","orcid":"https://orcid.org/0000-0002-3670-3950"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Peng Guan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Duke University, Durham, NC, United States of America","Department of Electrical and Computer Engineering, Duke University, Durham, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Duke University, Durham, NC, United States of America","institution_ids":["https://openalex.org/I170897317"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Duke University, Durham, United States of America","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044844161","display_name":"Maxim Raginsky","orcid":"https://orcid.org/0000-0002-5586-9219"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maxim Raginsky","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Coordinated Science Laboratory, Urbana, IL, United States of America","Department of Electrical and Computer Engineering and the Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Coordinated Science Laboratory, Urbana, IL, United States of America","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana, United States of America","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061121471","display_name":"Rebecca Willett","orcid":"https://orcid.org/0000-0002-8109-7582"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rebecca M. Willett","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States of America","Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States of America","institution_ids":["https://openalex.org/I135310074"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, United States of America","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5071571999"],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":7.1469,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.96841507,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"59","issue":"6","first_page":"1423","last_page":"1438"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7129156589508057},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6407352685928345},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.6196227669715881},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5580633282661438},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5562324523925781},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5219611525535583},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5202623605728149},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.4853266775608063},{"id":"https://openalex.org/keywords/kullback\u2013leibler-divergence","display_name":"Kullback\u2013Leibler divergence","score":0.48310112953186035},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.446278840303421},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4301396310329437},{"id":"https://openalex.org/keywords/average-cost","display_name":"Average cost","score":0.42050641775131226},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3438248634338379},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18188261985778809},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1801607608795166},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1306101381778717}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7129156589508057},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6407352685928345},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.6196227669715881},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5580633282661438},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5562324523925781},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5219611525535583},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5202623605728149},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.4853266775608063},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.48310112953186035},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.446278840303421},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4301396310329437},{"id":"https://openalex.org/C2780958618","wikidata":"https://www.wikidata.org/wiki/Q945621","display_name":"Average cost","level":2,"score":0.42050641775131226},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3438248634338379},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18188261985778809},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1801607608795166},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1306101381778717},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C133425853","wikidata":"https://www.wikidata.org/wiki/Q60571","display_name":"Neoclassical economics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tac.2014.2301558","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2014.2301558","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.753.8922","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.753.8922","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://arxiv.org/pdf/1401.3198.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.800.2030","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.800.2030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://maxim.ece.illinois.edu/pubs/guan_raginsky_willett_IEEE-TAC12.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.854.4276","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.854.4276","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://maxim.ece.illinois.edu/pubs/guan_raginsky_willett_ACC12.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.75,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1557517019","https://openalex.org/W1570963478","https://openalex.org/W1599798800","https://openalex.org/W1967037758","https://openalex.org/W1980019479","https://openalex.org/W1984063140","https://openalex.org/W1998498767","https://openalex.org/W1999918998","https://openalex.org/W2000850397","https://openalex.org/W2010654234","https://openalex.org/W2022585556","https://openalex.org/W2026337855","https://openalex.org/W2037197628","https://openalex.org/W2074680702","https://openalex.org/W2077902449","https://openalex.org/W2078059111","https://openalex.org/W2093524643","https://openalex.org/W2103012681","https://openalex.org/W2107662876","https://openalex.org/W2113151258","https://openalex.org/W2125868696","https://openalex.org/W2140282437","https://openalex.org/W2146524676","https://openalex.org/W2155772159","https://openalex.org/W2156211713","https://openalex.org/W2157016390","https://openalex.org/W2167446990","https://openalex.org/W2169401877","https://openalex.org/W2180192350","https://openalex.org/W2182751357","https://openalex.org/W2334782222","https://openalex.org/W2478708596","https://openalex.org/W2522146207","https://openalex.org/W2596585349","https://openalex.org/W2611627047","https://openalex.org/W2796137918","https://openalex.org/W2802074788","https://openalex.org/W2950929549","https://openalex.org/W2990138404","https://openalex.org/W3167142514","https://openalex.org/W4210379104","https://openalex.org/W4233413206","https://openalex.org/W4233696721","https://openalex.org/W4244229143","https://openalex.org/W4245744559","https://openalex.org/W4248175720","https://openalex.org/W4250389103","https://openalex.org/W4253542043","https://openalex.org/W4302617909","https://openalex.org/W4396738162","https://openalex.org/W6650205757","https://openalex.org/W6650723439"],"related_works":["https://openalex.org/W2970347269","https://openalex.org/W4287102143","https://openalex.org/W1850488217","https://openalex.org/W2945119207","https://openalex.org/W3182614517","https://openalex.org/W4388236136","https://openalex.org/W2807018115","https://openalex.org/W4200250224","https://openalex.org/W2285658092","https://openalex.org/W4287863949"],"abstract_inverted_index":{"This":[0],"paper":[1],"considers":[2],"an":[3,10,118],"online":[4,86],"(real-time)":[5],"control":[6,64,185],"problem":[7,90],"that":[8,78,96],"involves":[9],"agent":[11,109],"performing":[12],"a":[13,18,56,59,63,104,124,159,169],"discrete-time":[14],"random":[15],"walk":[16],"over":[17],"finite":[19],"state":[20,38,60,98,113,149],"space.":[21],"The":[22,85],"agent's":[23,74],"action":[24],"at":[25,51],"each":[26,52],"time":[27,53],"step":[28,54],"is":[29,55,91,155],"to":[30,93],"specify":[31],"the":[32,36,40,44,48,68,73,89,94,97,108,111,140,148,162,165],"probability":[33],"distribution":[34,76],"for":[35],"next":[37],"given":[39,66],"current":[41,112],"state.":[42],"Following":[43],"setup":[45],"of":[46,58,88,123,147,161,164,176],"Todorov,":[47],"state-action":[49],"cost":[50,61,65,99,114,138,142,186],"sum":[57],"and":[62,77,107,139],"by":[67,80,103],"Kullback-Leibler":[69],"(KL)":[70],"divergence":[71],"between":[72,134],"next-state":[75],"determined":[79],"some":[81],"fixed":[82],"passive":[83],"dynamics.":[84],"aspect":[87],"due":[92],"fact":[95],"functions":[100],"are":[101,187],"generated":[102],"dynamic":[105],"environment,":[106],"learns":[110],"only":[115],"after":[116],"selecting":[117],"action.":[119],"An":[120],"explicit":[121],"construction":[122],"computationally":[125],"efficient":[126],"strategy":[127,167],"with":[128,158,183],"small":[129],"regret":[130],"(i.e.,":[131],"expected":[132],"difference":[133],"its":[135],"actual":[136],"total":[137],"smallest":[141],"attainable":[143],"using":[144],"noncausal":[145],"knowledge":[146],"costs)":[150],"under":[151],"mild":[152],"regularity":[153],"conditions":[154],"presented,":[156],"along":[157],"demonstration":[160],"performance":[163],"proposed":[166],"on":[168,179],"simulated":[170],"target":[171],"tracking":[172],"problem.":[173],"A":[174],"number":[175],"new":[177],"results":[178],"Markov":[180],"decision":[181],"processes":[182],"KL":[184],"also":[188],"obtained.":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
