{"id":"https://openalex.org/W4403792985","doi":"https://doi.org/10.24963/kr.2024/85","title":"Learning Robust Reward Machines from Noisy Labels","display_name":"Learning Robust Reward Machines from Noisy Labels","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403792985","doi":"https://doi.org/10.24963/kr.2024/85"},"language":"en","primary_location":{"id":"doi:10.24963/kr.2024/85","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2024/85","pdf_url":"https://proceedings.kr.org/2024/85/kr2024-0085-parac-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentyFirst International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://proceedings.kr.org/2024/85/kr2024-0085-parac-et-al.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107404917","display_name":"Roko Para\u0107","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Roko Para\u0107","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092490053","display_name":"Lorenzo Nodari","orcid":null},"institutions":[{"id":"https://openalex.org/I30667456","display_name":"Brescia University","ror":"https://ror.org/015ahgd08","country_code":"US","type":"education","lineage":["https://openalex.org/I30667456"]},{"id":"https://openalex.org/I79940851","display_name":"University of Brescia","ror":"https://ror.org/02q2d2610","country_code":"IT","type":"education","lineage":["https://openalex.org/I79940851"]}],"countries":["IT","US"],"is_corresponding":false,"raw_author_name":"Lorenzo Nodari","raw_affiliation_strings":["University of Brescia"],"affiliations":[{"raw_affiliation_string":"University of Brescia","institution_ids":["https://openalex.org/I30667456","https://openalex.org/I79940851"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047729153","display_name":"Leo Ardon","orcid":"https://orcid.org/0000-0003-4400-7127"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Leo Ardon","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023452296","display_name":"Daniel Furelos-Blanco","orcid":"https://orcid.org/0000-0001-7461-1910"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Daniel Furelos-Blanco","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043937484","display_name":"Federico Cerutti","orcid":"https://orcid.org/0000-0003-0755-0358"},"institutions":[{"id":"https://openalex.org/I30667456","display_name":"Brescia University","ror":"https://ror.org/015ahgd08","country_code":"US","type":"education","lineage":["https://openalex.org/I30667456"]},{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]},{"id":"https://openalex.org/I79940851","display_name":"University of Brescia","ror":"https://ror.org/02q2d2610","country_code":"IT","type":"education","lineage":["https://openalex.org/I79940851"]}],"countries":["GB","IT","US"],"is_corresponding":false,"raw_author_name":"Federico Cerutti","raw_affiliation_strings":["Cardiff University","University of Brescia"],"affiliations":[{"raw_affiliation_string":"Cardiff University","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"University of Brescia","institution_ids":["https://openalex.org/I30667456","https://openalex.org/I79940851"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046462940","display_name":"Alessandra Russo","orcid":"https://orcid.org/0000-0002-3318-8711"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alessandra Russo","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5107404917"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":0.3407,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.6750831,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"909","last_page":"919"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9038000106811523,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9038000106811523,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7224526405334473},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5762741565704346},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44109174609184265}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7224526405334473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5762741565704346},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44109174609184265}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.24963/kr.2024/85","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2024/85","pdf_url":"https://proceedings.kr.org/2024/85/kr2024-0085-parac-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentyFirst International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"},{"id":"pmh:oai:https://orca.cardiff.ac.uk:173713","is_oa":false,"landing_page_url":"https://orca.cardiff.ac.uk/view/cardiffauthors/A21880390.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4306401195","display_name":"ORCA Online Research @Cardiff (Cardiff University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79510175","host_organization_name":"Cardiff University","host_organization_lineage":["https://openalex.org/I79510175"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"doi:10.24963/kr.2024/85","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2024/85","pdf_url":"https://proceedings.kr.org/2024/85/kr2024-0085-parac-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentyFirst International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4269887845","display_name":null,"funder_award_id":"EP/S023356/1","funder_id":"https://openalex.org/F4320314731","funder_display_name":"UK Research and Innovation"},{"id":"https://openalex.org/G5259331294","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"},{"id":"https://openalex.org/G5508379222","display_name":null,"funder_award_id":"EP/X040518/1","funder_id":"https://openalex.org/F4320314731","funder_display_name":"UK Research and Innovation"},{"id":"https://openalex.org/G5726614710","display_name":null,"funder_award_id":"EP/X040518/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8444387157","display_name":"CHEDDAR: Communications Hub For Empowering Distributed ClouD Computing Applications And Research","funder_award_id":"EP/X040518/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8801017843","display_name":null,"funder_award_id":"EP/S023356/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320338295","display_name":"Army Research Laboratory","ror":"https://ror.org/011hc8f90"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4403792985.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"PROB-IRM,":[3],"an":[4,159],"approach":[5],"that":[6,35,102,130,144],"learns":[7],"robust":[8,53],"reward":[9,128],"machines":[10],"(RMs)":[11],"for":[12,78],"reinforcement":[13],"learning":[14,86,172],"(RL)":[15],"agents":[16,178,185],"from":[17,60,137,151,175],"noisy":[18,55,61,152,176],"execution":[19],"traces.":[20,139],"The":[21],"key":[22],"aspect":[23],"of":[24,30,68,118,127,171],"RM-driven":[25],"RL":[26,97,120,160],"is":[27,81,93,103],"the":[28,37,64,79,82,96,110,116,119,132,138,169,173],"exploitation":[29],"a":[31,46,90,100,124],"finite-state":[32],"ma-":[33],"chine":[34],"decomposes":[36],"agent\u2019s":[38],"task":[39],"into":[40],"different":[41],"sub-":[42],"tasks.":[43],"PROB-IRM":[44,122,145,181],"uses":[45,131],"state-of-the-art":[47],"inductive":[48],"logic":[49],"pro-":[50],"gramming":[51],"framework":[52],"to":[54,57,106,157,162,184],"examples":[56],"learn":[58,147],"RMs":[59,150],"traces":[62,153],"using":[63],"Bayesian":[65,134],"posterior":[66,133],"degree":[67],"be-":[69],"liefs,":[70],"thus":[71],"ensuring":[72],"robustness":[73],"against":[74],"inconsistencies.":[75],"Piv-":[76],"otal":[77],"results":[80],"interleaving":[83],"between":[84],"RM":[85,92,174],"and":[87,154],"policy":[88],"learning:":[89],"new":[91],"learned":[94],"whenever":[95],"agent":[98,161],"generates":[99],"trace":[101],"believed":[104],"not":[105],"be":[107],"accepted":[108],"by":[109],"current":[111],"RM.":[112],"To":[113],"speed":[114],"up":[115],"training":[117],"agent,":[121],"employs":[123],"probabilistic":[125],"formulation":[126],"shaping":[129],"beliefs":[135],"derived":[136],"Our":[140],"experimental":[141],"analysis":[142],"shows":[143],"can":[146],"(potentially":[148],"imperfect)":[149],"exploit":[155],"them":[156],"train":[158],"solve":[163],"its":[164],"tasks":[165],"success-":[166],"fully.":[167],"Despite":[168],"complexity":[170],"traces,":[177],"trained":[179],"with":[180,187],"perform":[182],"comparably":[183],"provided":[186],"handcrafted":[188],"RMs.":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
