{"id":"https://openalex.org/W4403598530","doi":"https://doi.org/10.1109/tac.2024.3484290","title":"Sample-Efficient Reinforcement Learning With Temporal Logic Objectives: Leveraging the Task Specification to Guide Exploration","display_name":"Sample-Efficient Reinforcement Learning With Temporal Logic Objectives: Leveraging the Task Specification to Guide Exploration","publication_year":2024,"publication_date":"2024-10-21","ids":{"openalex":"https://openalex.org/W4403598530","doi":"https://doi.org/10.1109/tac.2024.3484290"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2024.3484290","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3484290","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014874021","display_name":"Yiannis Kantaros","orcid":"https://orcid.org/0000-0002-0257-7378"},"institutions":[{"id":"https://openalex.org/I204465549","display_name":"Washington University in St. Louis","ror":"https://ror.org/01yc7t268","country_code":"US","type":"education","lineage":["https://openalex.org/I204465549"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yiannis Kantaros","raw_affiliation_strings":["Department of Electrical and Systems Engineering, Washington University in St. Louis, St. Louis, MO, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Systems Engineering, Washington University in St. Louis, St. Louis, MO, USA","institution_ids":["https://openalex.org/I204465549"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100384877","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0003-1700-1512"},"institutions":[{"id":"https://openalex.org/I204465549","display_name":"Washington University in St. Louis","ror":"https://ror.org/01yc7t268","country_code":"US","type":"education","lineage":["https://openalex.org/I204465549"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["Department of Electrical and Systems Engineering, Washington University in St. Louis, St. Louis, MO, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Systems Engineering, Washington University in St. Louis, St. Louis, MO, USA","institution_ids":["https://openalex.org/I204465549"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5014874021"],"corresponding_institution_ids":["https://openalex.org/I204465549"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1586327,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"70","issue":"5","first_page":"2873","last_page":"2888"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.8593999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.8593999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8355000019073486,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.7860999703407288,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7801738381385803},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6934553384780884},{"id":"https://openalex.org/keywords/temporal-logic","display_name":"Temporal logic","score":0.6216117739677429},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5998720526695251},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5964985489845276},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48926255106925964},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3535092771053314},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3468776345252991},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1880263388156891},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.16926029324531555},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.15703734755516052}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7801738381385803},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6934553384780884},{"id":"https://openalex.org/C25016198","wikidata":"https://www.wikidata.org/wiki/Q781833","display_name":"Temporal logic","level":2,"score":0.6216117739677429},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5998720526695251},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5964985489845276},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48926255106925964},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3535092771053314},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3468776345252991},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1880263388156891},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.16926029324531555},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.15703734755516052},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2024.3484290","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3484290","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1556387789","https://openalex.org/W2056584142","https://openalex.org/W2059470663","https://openalex.org/W2080813827","https://openalex.org/W2107726111","https://openalex.org/W2150335178","https://openalex.org/W2167185915","https://openalex.org/W2226453522","https://openalex.org/W2487186542","https://openalex.org/W2554009248","https://openalex.org/W2620827312","https://openalex.org/W2624912704","https://openalex.org/W2736666525","https://openalex.org/W2895196950","https://openalex.org/W2914702425","https://openalex.org/W2931553127","https://openalex.org/W2963523627","https://openalex.org/W2963778636","https://openalex.org/W3011250830","https://openalex.org/W3021964239","https://openalex.org/W3026873144","https://openalex.org/W3039276491","https://openalex.org/W3090827750","https://openalex.org/W3092156990","https://openalex.org/W3126402318","https://openalex.org/W3127561923","https://openalex.org/W3175871729","https://openalex.org/W3176904019","https://openalex.org/W4206742989","https://openalex.org/W4214717370","https://openalex.org/W4220689301","https://openalex.org/W4220923746","https://openalex.org/W4295113525","https://openalex.org/W4313016741","https://openalex.org/W4321366759","https://openalex.org/W4382936112","https://openalex.org/W4391022014","https://openalex.org/W6697359928","https://openalex.org/W6738568706","https://openalex.org/W6745497480","https://openalex.org/W6752298494","https://openalex.org/W6756714161","https://openalex.org/W6760985608","https://openalex.org/W6786111744","https://openalex.org/W6797705063","https://openalex.org/W6800089562","https://openalex.org/W6864914272"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W3196817267","https://openalex.org/W1976600725"],"abstract_inverted_index":{"In":[0,96],"this":[1,97],"article,":[2,98],"we":[3,99],"address":[4],"the":[5,32,36,83,88,92,144,147,159,163],"problem":[6],"of":[7,38,146,152],"learning":[8,52],"optimal":[9],"control":[10,19,39,108],"policies":[11,109],"for":[12,55],"systems":[13],"with":[14],"uncertain":[15],"dynamics":[16],"and":[17,35,87,139],"high-level":[18],"objectives":[20],"specified":[21],"as":[22,82,158],"linear":[23],"temporal":[24],"logic":[25],"(LTL)":[26],"formulas.":[27],"Uncertainty":[28],"is":[29],"considered":[30],"in":[31],"workspace":[33],"structure":[34],"outcomes":[37],"decisions":[40],"giving":[41],"rise":[42],"to":[43,132],"an":[44,69,101],"unknown":[45],"Markov":[46],"decision":[47],"process":[48],"(MDP).":[49],"Existing":[50],"reinforcement":[51],"(RL)":[53],"algorithms":[54],"LTL":[56],"tasks":[57],"typically":[58],"rely":[59],"on":[60,118],"exploring":[61],"a":[62,119],"product":[63],"MDP":[64,89,164],"state-space":[65],"uniformly":[66],"(using":[67],"e.g.,":[68],"<inline-formula":[70],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[71],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[72],"notation=\"LaTeX\">$\\epsilon$</tex-math></inline-formula>-greedy":[73],"policy)":[74],"compromising":[75],"sample-efficiency.":[76],"This":[77],"issue":[78],"becomes":[79,155],"more":[80,156],"pronounced":[81],"rewards":[84],"get":[85],"sparser":[86],"size":[90,165],"or":[91,162],"task":[93,133,160],"complexity":[94,161],"increase.":[95],"propose":[100],"accelerated":[102],"RL":[103],"algorithm":[104],"that":[105,124,129],"can":[106],"learn":[107],"significantly":[110],"faster":[111],"than":[112],"competitive":[113],"approaches.":[114],"Its":[115],"sample-efficiency":[116,145],"relies":[117],"novel":[120],"task-driven":[121],"exploration":[122,126],"strategy":[123],"biases":[125],"toward":[127],"directions":[128],"may":[130],"contribute":[131],"satisfaction.":[134],"We":[135],"provide":[136],"theoretical":[137],"analysis":[138],"extensive":[140],"comparative":[141],"experiments":[142],"demonstrating":[143],"proposed":[148],"method.":[149],"The":[150],"benefit":[151],"our":[153],"method":[154],"evident":[157],"increases.":[166]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
