{"id":"https://openalex.org/W7125233049","doi":"https://doi.org/10.48550/arxiv.2601.11890","title":"From Relative Entropy to Minimax: A Unified Framework for Coverage in MDPs","display_name":"From Relative Entropy to Minimax: A Unified Framework for Coverage in MDPs","publication_year":2026,"publication_date":"2026-01-17","ids":{"openalex":"https://openalex.org/W7125233049","doi":"https://doi.org/10.48550/arxiv.2601.11890"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.11890","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.11890","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.11890","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123540865","display_name":"Xihe Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gu, Xihe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072739163","display_name":"Urbashi Mitra","orcid":"https://orcid.org/0000-0002-8896-1177"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mitra, Urbashi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122331649","display_name":"Tara Javidi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Javidi, Tara","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5123540865"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.848800003528595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.848800003528595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.0203000009059906,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.009100000374019146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.61080002784729},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5577999949455261},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5314000248908997},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5210000276565552},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.44359999895095825},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4237000048160553},{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.3856000006198883}],"concepts":[{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.61080002784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5709999799728394},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5577999949455261},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5314000248908997},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5210000276565552},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5055000185966492},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.44359999895095825},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4090999960899353},{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C2775997480","wikidata":"https://www.wikidata.org/wiki/Q586277","display_name":"Degree (music)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.334199994802475},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3325999975204468},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.30799999833106995},{"id":"https://openalex.org/C75782508","wikidata":"https://www.wikidata.org/wiki/Q3333633","display_name":"Cross-entropy method","level":4,"score":0.3068999946117401},{"id":"https://openalex.org/C101721835","wikidata":"https://www.wikidata.org/wiki/Q813908","display_name":"Conditional entropy","level":3,"score":0.2825999855995178},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2750999927520752}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.11890","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.11890","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.11890","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.11890","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.810697078704834,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Targeted":[0],"and":[1,31,46,80],"deliberate":[2],"exploration":[3,37,142],"of":[4,23,49,87,100,103],"state--action":[5,59,112,148],"pairs":[6,19],"is":[7],"essential":[8],"in":[9,154],"reward-free":[10],"Markov":[11],"Decision":[12],"Problems":[13],"(MDPs).":[14],"More":[15],"precisely,":[16],"different":[17,21],"state-action":[18],"exhibit":[20],"degree":[22],"importance":[24],"or":[25],"difficulty":[26],"which":[27],"must":[28],"be":[29],"actively":[30,123],"explicitly":[32],"built":[33],"into":[34],"a":[35,44,70,119,129],"controlled":[36],"strategy.":[38],"To":[39],"this":[40,115],"end,":[41],"we":[42,117,134],"propose":[43],"weighted":[45,77],"parameterized":[47],"family":[48,63],"concave":[50],"coverage":[51,131,152],"objectives,":[52],"denoted":[53],"by":[54],"$U_\u03c1$,":[55],"defined":[56],"directly":[57],"over":[58],"occupancy":[60,127],"measures.":[61],"This":[62],"unifies":[64],"several":[65],"widely":[66],"studied":[67],"objectives":[68],"within":[69],"single":[71],"framework,":[72],"including":[73],"divergence-based":[74],"marginal":[75],"matching,":[76],"average":[78],"coverage,":[79],"worst-case":[81,151],"(minimax)":[82],"coverage.":[83],"While":[84],"the":[85,90,96,101,125,140,146,155],"concavity":[86],"$U_\u03c1$":[88,104],"captures":[89],"diminishing":[91],"return":[92],"associated":[93],"with":[94],"over-exploration,":[95],"simple":[97],"closed":[98],"form":[99],"gradient":[102],"enables":[105],"an":[106],"explicit":[107],"control":[108],"to":[109],"prioritize":[110],"under-explored":[111],"pairs.":[113],"Leveraging":[114],"structure,":[116],"develop":[118],"gradient-based":[120],"algorithm":[121],"that":[122,136],"steers":[124],"induced":[126],"toward":[128],"desired":[130],"pattern.":[132],"Moreover,":[133],"show":[135],"as":[137],"$\u03c1$":[138],"increases,":[139],"resulting":[141],"strategy":[143],"increasingly":[144],"emphasizes":[145],"least-explored":[147],"pairs,":[149],"recovering":[150],"behavior":[153],"limit.":[156]},"counts_by_year":[],"updated_date":"2026-01-22T23:33:04.759266","created_date":"2026-01-22T00:00:00"}
