{"id":"https://openalex.org/W4396712964","doi":"https://doi.org/10.1109/lcsys.2024.3397844","title":"Tractable Reinforcement Learning for Signal Temporal Logic Tasks With Counterfactual Experience Replay","display_name":"Tractable Reinforcement Learning for Signal Temporal Logic Tasks With Counterfactual Experience Replay","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4396712964","doi":"https://doi.org/10.1109/lcsys.2024.3397844"},"language":"en","primary_location":{"id":"doi:10.1109/lcsys.2024.3397844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2024.3397844","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102730865","display_name":"Siqi Wang","orcid":"https://orcid.org/0000-0003-3186-7785"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Siqi Wang","raw_affiliation_strings":["Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032657663","display_name":"Xunyuan Yin","orcid":"https://orcid.org/0000-0002-9823-9209"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Xunyuan Yin","raw_affiliation_strings":["Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","School of Chemistry, Chemical Engineering and Biotechnology, Nanyang Technological University, Jurong West, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Chemistry, Chemical Engineering and Biotechnology, Nanyang Technological University, Jurong West, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041126305","display_name":"Shaoyuan Li","orcid":"https://orcid.org/0000-0003-3427-2912"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaoyuan Li","raw_affiliation_strings":["Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034304769","display_name":"Xiang Yin","orcid":"https://orcid.org/0000-0003-1944-1570"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Xiang Yin","raw_affiliation_strings":["Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","School of Chemistry, Chemical Engineering and Biotechnology, Nanyang Technological University, Jurong West, Singapore","Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Automation and the Key Laboratory of System Control and Information Processing, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Chemistry, Chemical Engineering and Biotechnology, Nanyang Technological University, Jurong West, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102730865"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73756917,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"8","issue":null,"first_page":"616","last_page":"621"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.879057765007019},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.8762711882591248},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7937198281288147},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7516893148422241},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7298548221588135},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5544711351394653},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5484970808029175},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5302668809890747},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5207069516181946},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4970698654651642},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4681437611579895},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.46411922574043274},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4136989712715149},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41346466541290283},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.34697702527046204},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20339414477348328},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08518615365028381},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08317512273788452}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.879057765007019},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.8762711882591248},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7937198281288147},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7516893148422241},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7298548221588135},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5544711351394653},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5484970808029175},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5302668809890747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5207069516181946},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4970698654651642},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4681437611579895},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46411922574043274},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4136989712715149},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41346466541290283},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.34697702527046204},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20339414477348328},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08518615365028381},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08317512273788452},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lcsys.2024.3397844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2024.3397844","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G210139373","display_name":null,"funder_award_id":"62061136004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5751649387","display_name":null,"funder_award_id":"62173226","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W2092884371","https://openalex.org/W2153024173","https://openalex.org/W2524638160","https://openalex.org/W2841721725","https://openalex.org/W2912250739","https://openalex.org/W2980133010","https://openalex.org/W3004091789","https://openalex.org/W3048348667","https://openalex.org/W3137252442","https://openalex.org/W3185320737","https://openalex.org/W3201878770","https://openalex.org/W4210600503","https://openalex.org/W4229029576","https://openalex.org/W4287509573","https://openalex.org/W4309793424","https://openalex.org/W4323323774","https://openalex.org/W4382318055","https://openalex.org/W4385768249","https://openalex.org/W6772627330","https://openalex.org/W6850700550"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W3096874164","https://openalex.org/W1985560493","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W3168977894"],"abstract_inverted_index":{"We":[0],"investigate":[1],"the":[2,32,36,57,79,89,105,109,117],"control":[3,28,44],"synthesis":[4,45],"problem":[5],"for":[6,75],"Markov":[7],"decision":[8],"processes":[9],"(MDPs)":[10],"with":[11],"unknown":[12],"transition":[13],"probabilities":[14],"under":[15],"signal":[16],"temporal":[17],"logic":[18],"(STL)":[19],"specifications.":[20],"Our":[21],"primary":[22],"objective":[23],"is":[24],"to":[25,42,60,87,102],"learn":[26],"a":[27,50,69,113],"policy":[29],"that":[30,96],"maximizes":[31],"probability":[33],"of":[34,119],"satisfying":[35],"STL":[37,43,62,76],"task.":[38],"However,":[39],"existing":[40],"approaches":[41],"using":[46],"reinforcement":[47,71],"learning":[48,72],"encounter":[49],"significant":[51],"exploration":[52,80],"challenge,":[53],"particularly":[54],"when":[55],"expanding":[56],"state":[58],"space":[59],"incorporate":[61],"tasks.":[63],"In":[64],"this":[65],"work,":[66],"we":[67,94],"propose":[68],"novel":[70],"algorithm":[73],"tailored":[74],"tasks,":[77],"addressing":[78],"difficulty":[81],"by":[82],"effectively":[83],"leveraging":[84],"counterfactual":[85],"experiences":[86,99],"expedite":[88],"training":[90],"process.":[91],"Through":[92],"experiments":[93],"show":[95],"these":[97],"generated":[98],"enable":[100],"us":[101],"fully":[103],"employ":[104],"knowledge":[106],"embedded":[107],"within":[108],"task,":[110],"resulting":[111],"in":[112,116],"substantial":[114],"reduction":[115],"number":[118],"trial-and-error":[120],"explorations":[121],"required":[122],"before":[123],"achieving":[124],"convergence.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
