{"id":"https://openalex.org/W4363620163","doi":"https://doi.org/10.1109/ciss56502.2023.10089655","title":"Information-Directed Policy Search in Sparse-Reward Settings via the Occupancy Information Ratio","display_name":"Information-Directed Policy Search in Sparse-Reward Settings via the Occupancy Information Ratio","publication_year":2023,"publication_date":"2023-03-22","ids":{"openalex":"https://openalex.org/W4363620163","doi":"https://doi.org/10.1109/ciss56502.2023.10089655"},"language":"en","primary_location":{"id":"doi:10.1109/ciss56502.2023.10089655","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ciss56502.2023.10089655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 57th Annual Conference on Information Sciences and Systems (CISS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056599663","display_name":"Wesley A. Suttle","orcid":"https://orcid.org/0000-0003-1234-7151"},"institutions":[{"id":"https://openalex.org/I166416128","display_name":"DEVCOM Army Research Laboratory","ror":"https://ror.org/011hc8f90","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wesley A. Suttle","raw_affiliation_strings":["U.s. Army Research Laboratory,Adelphi,MD,USA","U.s. Army Research Laboratory, Adelphi, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"U.s. Army Research Laboratory,Adelphi,MD,USA","institution_ids":["https://openalex.org/I166416128"]},{"raw_affiliation_string":"U.s. Army Research Laboratory, Adelphi, MD, USA","institution_ids":["https://openalex.org/I166416128"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025896653","display_name":"Alec Koppel","orcid":"https://orcid.org/0000-0003-2447-2873"},"institutions":[{"id":"https://openalex.org/I2802755631","display_name":"Morgan Stanley (United States)","ror":"https://ror.org/00aphdz18","country_code":"US","type":"company","lineage":["https://openalex.org/I2802755631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alec Koppel","raw_affiliation_strings":["J.P&#x2019; Morgan AI Research,New York,NY,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"J.P&#x2019; Morgan AI Research,New York,NY,USA","institution_ids":["https://openalex.org/I2802755631"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100396338","display_name":"Ji Liu","orcid":"https://orcid.org/0000-0003-2871-9888"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ji Liu","raw_affiliation_strings":["Electrical and Computer Engineering Stony Brook University,Stony Brook,NY,USA","Electrical and Computer Engineering Stony Brook University, Stony Brook, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Stony Brook University,Stony Brook,NY,USA","institution_ids":["https://openalex.org/I59553526"]},{"raw_affiliation_string":"Electrical and Computer Engineering Stony Brook University, Stony Brook, NY, USA","institution_ids":["https://openalex.org/I59553526"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03115451,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"99","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6894532442092896},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6534483432769775},{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.6358842253684998},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.49090415239334106},{"id":"https://openalex.org/keywords/information-theory","display_name":"Information theory","score":0.41503429412841797},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40637922286987305},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.37367647886276245},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21729260683059692},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.19186222553253174},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17482757568359375},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11474606394767761}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6894532442092896},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6534483432769775},{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.6358842253684998},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.49090415239334106},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.41503429412841797},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40637922286987305},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.37367647886276245},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21729260683059692},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.19186222553253174},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17482757568359375},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11474606394767761},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ciss56502.2023.10089655","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ciss56502.2023.10089655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 57th Annual Conference on Information Sciences and Systems (CISS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G4261513530","display_name":null,"funder_award_id":"W911NF-21-2-0127,W911NF-22-2-0003","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"},{"id":"https://openalex.org/G8053630600","display_name":"III: Small: Distributed Reinforcement Learning over Complex Networks","funder_award_id":"2230101","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338295","display_name":"Army Research Laboratory","ror":"https://ror.org/011hc8f90"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2094387729","https://openalex.org/W2106164082","https://openalex.org/W2145339207","https://openalex.org/W2156737235","https://openalex.org/W2736601468","https://openalex.org/W2741122588","https://openalex.org/W2938421504","https://openalex.org/W2953326529","https://openalex.org/W2963099939","https://openalex.org/W2973229164","https://openalex.org/W3038915804","https://openalex.org/W3109546547","https://openalex.org/W3214229832","https://openalex.org/W4214717370","https://openalex.org/W4287119221","https://openalex.org/W4320473289","https://openalex.org/W4384448138","https://openalex.org/W6675656252","https://openalex.org/W6742461812","https://openalex.org/W6747473740","https://openalex.org/W6748523217","https://openalex.org/W6757058172","https://openalex.org/W6763002318","https://openalex.org/W6766497270","https://openalex.org/W6776824048","https://openalex.org/W6780386840","https://openalex.org/W6791000347","https://openalex.org/W6791194670","https://openalex.org/W6797142581"],"related_works":["https://openalex.org/W4282043467","https://openalex.org/W2105697914","https://openalex.org/W3093197249","https://openalex.org/W1968324288","https://openalex.org/W1540010871","https://openalex.org/W3023979140","https://openalex.org/W2904068067","https://openalex.org/W1565491139","https://openalex.org/W3177545769","https://openalex.org/W2202433167"],"abstract_inverted_index":{"This":[0],"paper":[1,24,64],"examines":[2],"a":[3],"new":[4],"measure":[5],"of":[6,39,56,62,75],"the":[7,15,23,26,33,40,54,73,76],"exploration/exploitation":[8],"trade-off":[9],"in":[10,87],"reinforcement":[11],"learning":[12],"(RL)":[13],"called":[14],"occupancy":[16],"information":[17],"ratio":[18],"(OIR).":[19],"To":[20],"this":[21,63],"end,":[22],"derives":[25],"Information-Directed":[27],"Actor-Critic":[28],"(IDAC)":[29],"algorithm":[30],"for":[31],"solving":[32],"OIR":[34,47,77],"problem,":[35],"provides":[36],"an":[37],"overview":[38],"rich":[41],"theory":[42],"underlying":[43],"IDAC":[44,79],"and":[45,51],"related":[46],"policy":[48],"gradient":[49],"methods,":[50],"experimentally":[52],"investigates":[53],"advantages":[55],"such":[57],"methods.":[58],"The":[59],"central":[60],"contribution":[61],"is":[65],"to":[66,72],"provide":[67],"empirical":[68],"evidence":[69],"that,":[70],"due":[71],"form":[74],"objective,":[78],"enjoys":[80],"superior":[81],"performance":[82],"over":[83],"vanilla":[84],"RL":[85],"methods":[86],"sparse-reward":[88],"environments.":[89]},"counts_by_year":[],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}
