{"id":"https://openalex.org/W4225014288","doi":"https://doi.org/10.1145/3501710.3524734","title":"Poster Abstract: Model-Free Reinforcement Learning for Symbolic Automata-encoded Objectives","display_name":"Poster Abstract: Model-Free Reinforcement Learning for Symbolic Automata-encoded Objectives","publication_year":2022,"publication_date":"2022-04-28","ids":{"openalex":"https://openalex.org/W4225014288","doi":"https://doi.org/10.1145/3501710.3524734"},"language":"en","primary_location":{"id":"doi:10.1145/3501710.3524734","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3501710.3524734","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3501710.3524734","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th ACM International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3501710.3524734","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010081957","display_name":"Anand Balakrishnan","orcid":"https://orcid.org/0000-0002-8781-4810"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I2800817003","display_name":"Southern California University for Professional Studies","ror":"https://ror.org/058zz0t50","country_code":"US","type":"education","lineage":["https://openalex.org/I2800817003"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anand Balakrishnan","raw_affiliation_strings":["University of Southern California, United States"],"affiliations":[{"raw_affiliation_string":"University of Southern California, United States","institution_ids":["https://openalex.org/I2800817003","https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043437345","display_name":"Stefan Jak\u0161i\u0107","orcid":"https://orcid.org/0000-0002-3203-9415"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Stefan Jaksic","raw_affiliation_strings":["AIT Austrian Institute of Technology GmbH, Austria"],"affiliations":[{"raw_affiliation_string":"AIT Austrian Institute of Technology GmbH, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057223922","display_name":"Edgar A. Aguilar","orcid":"https://orcid.org/0000-0002-1177-9246"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Edgar Aguilar","raw_affiliation_strings":["AIT Austrian Institute of Technology GmbH, Austria"],"affiliations":[{"raw_affiliation_string":"AIT Austrian Institute of Technology GmbH, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052399473","display_name":"Dejan Ni\u010dkovi\u0107","orcid":"https://orcid.org/0000-0001-5468-0396"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Dejan Nickovic","raw_affiliation_strings":["AIT Austrian Institute of Technology GmbH, Austria"],"affiliations":[{"raw_affiliation_string":"AIT Austrian Institute of Technology GmbH, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057473400","display_name":"Jyotirmoy V. Deshmukh","orcid":"https://orcid.org/0000-0003-4683-5540"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I2800817003","display_name":"Southern California University for Professional Studies","ror":"https://ror.org/058zz0t50","country_code":"US","type":"education","lineage":["https://openalex.org/I2800817003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jyotirmoy Deshmukh","raw_affiliation_strings":["University of Southern California, United States"],"affiliations":[{"raw_affiliation_string":"University of Southern California, United States","institution_ids":["https://openalex.org/I2800817003","https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010081957"],"corresponding_institution_ids":["https://openalex.org/I1174212","https://openalex.org/I2800817003"],"apc_list":null,"apc_paid":null,"fwci":0.2922,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56074096,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7791067361831665},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.7516148090362549},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7244870066642761},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6605851650238037},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5652122497558594},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.5601450204849243},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5534425973892212},{"id":"https://openalex.org/keywords/automata-theory","display_name":"Automata theory","score":0.5243513584136963},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.5070686340332031},{"id":"https://openalex.org/keywords/learning-automata","display_name":"Learning automata","score":0.49255046248435974},{"id":"https://openalex.org/keywords/model-checking","display_name":"Model checking","score":0.4599238336086273},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33656632900238037},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1730884313583374}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7791067361831665},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.7516148090362549},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7244870066642761},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6605851650238037},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5652122497558594},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.5601450204849243},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5534425973892212},{"id":"https://openalex.org/C116248031","wikidata":"https://www.wikidata.org/wiki/Q214526","display_name":"Automata theory","level":3,"score":0.5243513584136963},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.5070686340332031},{"id":"https://openalex.org/C2776807809","wikidata":"https://www.wikidata.org/wiki/Q6510160","display_name":"Learning automata","level":3,"score":0.49255046248435974},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.4599238336086273},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33656632900238037},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1730884313583374},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3501710.3524734","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3501710.3524734","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3501710.3524734","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th ACM International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3501710.3524734","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3501710.3524734","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3501710.3524734","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th ACM International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4225014288.pdf","grobid_xml":"https://content.openalex.org/works/W4225014288.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W2078151802","https://openalex.org/W2091565802","https://openalex.org/W2121863487","https://openalex.org/W2524638160","https://openalex.org/W2676873924","https://openalex.org/W3004091789","https://openalex.org/W3026873144"],"related_works":["https://openalex.org/W2288872327","https://openalex.org/W2746280673","https://openalex.org/W1594593035","https://openalex.org/W2489338148","https://openalex.org/W2124961391","https://openalex.org/W2152996796","https://openalex.org/W2560415449","https://openalex.org/W2613833673","https://openalex.org/W2097931012","https://openalex.org/W576785828"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3],"propose":[4],"the":[5,61,83,87,90],"use":[6,18,47],"of":[7,19,26,48,89],"symbolic":[8,20,49],"automata":[9,21,50],"as":[10,23],"formal":[11],"specifications":[12,31],"for":[13],"reinforcement":[14],"learning":[15],"agents.":[16],"The":[17],"serves":[22],"a":[24],"generalization":[25],"both":[27],"bounded-time":[28],"temporal":[29],"logic-based":[30],"and":[32],"deterministic":[33],"finite":[34],"automata,":[35],"allowing":[36],"us":[37,52,80],"to":[38,53,65,81],"describe":[39],"input":[40],"alphabets":[41],"over":[42],"metric":[43],"spaces.":[44],"Furthermore,":[45],"our":[46,74],"allows":[51,79],"define":[54],"non-sparse":[55],"potential-based":[56,75],"rewards":[57],"which":[58],"empirically":[59],"shape":[60],"reward":[62],"surface,":[63],"leading":[64],"better":[66],"convergence":[67],"during":[68],"RL.":[69],"We":[70],"also":[71],"show":[72],"that":[73,85],"rewarding":[76],"strategy":[77],"still":[78],"obtain":[82],"policy":[84],"maximizes":[86],"satisfaction":[88],"given":[91],"specification.":[92]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
