{"id":"https://openalex.org/W4407307528","doi":"https://doi.org/10.3389/frobt.2024.1444188","title":"HPRS: hierarchical potential-based reward shaping from task specifications","display_name":"HPRS: hierarchical potential-based reward shaping from task specifications","publication_year":2025,"publication_date":"2025-02-10","ids":{"openalex":"https://openalex.org/W4407307528","doi":"https://doi.org/10.3389/frobt.2024.1444188","pmid":"https://pubmed.ncbi.nlm.nih.gov/39995500"},"language":"en","primary_location":{"id":"doi:10.3389/frobt.2024.1444188","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2024.1444188","pdf_url":null,"source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3389/frobt.2024.1444188","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060851540","display_name":"Luigi Berducci","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Luigi Berducci","raw_affiliation_strings":["Cyber-Physical Systems Group, Computer Engineering, TU Wien, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cyber-Physical Systems Group, Computer Engineering, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057223922","display_name":"Edgar A. Aguilar","orcid":"https://orcid.org/0000-0002-1177-9246"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Edgar A. Aguilar","raw_affiliation_strings":["Center for Digital Safety and Security, AIT Austrian Institute of Technology GmbH, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Digital Safety and Security, AIT Austrian Institute of Technology GmbH, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052399473","display_name":"Dejan Ni\u010dkovi\u0107","orcid":"https://orcid.org/0000-0001-5468-0396"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Dejan Ni\u010dkovi\u0107","raw_affiliation_strings":["Center for Digital Safety and Security, AIT Austrian Institute of Technology GmbH, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Digital Safety and Security, AIT Austrian Institute of Technology GmbH, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034362557","display_name":"Radu Grosu","orcid":"https://orcid.org/0000-0001-5715-2142"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Radu Grosu","raw_affiliation_strings":["Cyber-Physical Systems Group, Computer Engineering, TU Wien, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cyber-Physical Systems Group, Computer Engineering, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060851540"],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":{"value":1900,"currency":"USD","value_usd":1900},"apc_paid":{"value":1900,"currency":"USD","value_usd":1900},"fwci":2.0776,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86749284,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"11","issue":null,"first_page":"1444188","last_page":"1444188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7479984760284424},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7216523885726929},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6856560111045837},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6109896898269653},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5921703577041626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5788769125938416},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5251724123954773},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5028828978538513},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4747883677482605},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4610985517501831},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4533816874027252},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4260261058807373},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3776688873767853},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3599261939525604},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.14514389634132385},{"id":"https://openalex.org/keywords/operations-management","display_name":"Operations management","score":0.08461824059486389}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7479984760284424},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7216523885726929},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6856560111045837},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6109896898269653},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5921703577041626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5788769125938416},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5251724123954773},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5028828978538513},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4747883677482605},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4610985517501831},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4533816874027252},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4260261058807373},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3776688873767853},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3599261939525604},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.14514389634132385},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.08461824059486389},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3389/frobt.2024.1444188","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2024.1444188","pdf_url":null,"source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},{"id":"pmid:39995500","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39995500","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in robotics and AI","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11848067","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11848067","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Front Robot AI","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:a8fdabe093aa4579aad3391aeb3c7179","is_oa":true,"landing_page_url":"https://doaj.org/article/a8fdabe093aa4579aad3391aeb3c7179","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Frontiers in Robotics and AI, Vol 11 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3389/frobt.2024.1444188","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2024.1444188","pdf_url":null,"source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W1547304883","https://openalex.org/W1769460679","https://openalex.org/W1777239053","https://openalex.org/W1987725948","https://openalex.org/W1998649829","https://openalex.org/W2002305926","https://openalex.org/W2060846151","https://openalex.org/W2084261982","https://openalex.org/W2102660061","https://openalex.org/W2117428849","https://openalex.org/W2122410182","https://openalex.org/W2130750514","https://openalex.org/W2141481921","https://openalex.org/W2145339207","https://openalex.org/W2236968622","https://openalex.org/W2342662072","https://openalex.org/W2411093439","https://openalex.org/W2462906003","https://openalex.org/W2567705466","https://openalex.org/W2605102758","https://openalex.org/W2766447205","https://openalex.org/W2786261640","https://openalex.org/W2804948070","https://openalex.org/W2808386811","https://openalex.org/W2963190533","https://openalex.org/W2963778636","https://openalex.org/W2963864421","https://openalex.org/W2968120782","https://openalex.org/W2972486407","https://openalex.org/W2990138404","https://openalex.org/W2999905431","https://openalex.org/W3004091789","https://openalex.org/W3096990961","https://openalex.org/W3119264617","https://openalex.org/W3141989474","https://openalex.org/W3145799606","https://openalex.org/W3176233416","https://openalex.org/W3202635380","https://openalex.org/W3211390666","https://openalex.org/W3211811583","https://openalex.org/W3213774989","https://openalex.org/W4214717370","https://openalex.org/W4249441547","https://openalex.org/W4285102517","https://openalex.org/W4287113179","https://openalex.org/W4287266177","https://openalex.org/W4300773931","https://openalex.org/W4307078087","https://openalex.org/W4321392130","https://openalex.org/W4367016688","https://openalex.org/W4383108582","https://openalex.org/W6637770816","https://openalex.org/W6638088447","https://openalex.org/W6649830115","https://openalex.org/W6682455769","https://openalex.org/W6684921986","https://openalex.org/W6691872600","https://openalex.org/W6704571135","https://openalex.org/W6714262829","https://openalex.org/W6718836005","https://openalex.org/W6731702996","https://openalex.org/W6739585900","https://openalex.org/W6740881549","https://openalex.org/W6752298494","https://openalex.org/W6754554871","https://openalex.org/W6759788895","https://openalex.org/W6759943591","https://openalex.org/W6766842884","https://openalex.org/W6773395564","https://openalex.org/W6788460005","https://openalex.org/W6797705063","https://openalex.org/W6803713571","https://openalex.org/W6803914020","https://openalex.org/W6844321534","https://openalex.org/W6847031240","https://openalex.org/W6950609307","https://openalex.org/W7047190309"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"The":[0],"automatic":[1],"synthesis":[2],"of":[3,37,84,117,130,141,198,239,280,300],"policies":[4,188,223],"for":[5],"robotics":[6,179,284],"systems":[7],"through":[8],"reinforcement":[9],"learning":[10],"relies":[11],"upon,":[12],"and":[13,65,87,90,120,134,144,157,205,227,260,262],"is":[14,60,127,138,184],"intimately":[15],"guided":[16],"by,":[17],"a":[18,35,57,77,80,97,115,128,139,148,297],"reward":[19,46,104,126,133,137,172],"signal.":[20,105],"Consequently,":[21],"this":[22,73,108,160],"signal":[23,58],"should":[24],"faithfully":[25],"reflect":[26],"the":[27,103,112,124,131,135,142,196,199,211,237,258,269,277,304],"designer's":[28],"intentions,":[29],"which":[30,59],"are":[31],"often":[32],"expressed":[33],"as":[34,79],"collection":[36],"high-level":[38],"requirements.":[39,71,192,273],"Several":[40],"works":[41],"have":[42],"been":[43],"developing":[44],"automated":[45,93],"definitions":[47],"from":[48,252],"formal":[49],"requirements,":[50,219],"but":[51],"they":[52],"show":[53,248,295],"limitations":[54],"in":[55,63,266,282],"producing":[56],"both":[61],"effective":[62],"training":[64],"able":[66,185],"to":[67,95,154,161,186,210],"fulfill":[68],"multiple":[69],"heterogeneous":[70],"In":[72],"paper,":[74],"we":[75,151,235,275],"define":[76],"task":[78,301],"partially":[81],"ordered":[82],"set":[83],"safety,":[85,118],"target,":[86,119],"comfort":[88,121,136,226,253],"requirements":[89,101,113,254],"introduce":[91],"an":[92],"methodology":[94],"enforce":[96],"natural":[98],"order":[99],"among":[100],"into":[102,114],"We":[106,165],"perform":[107],"by":[109],"automatically":[110,216],"translating":[111],"sum":[116],"rewards,":[122],"where":[123],"target":[125,145,259,272],"function":[129,140],"safety":[132,143,261,270],"rewards.":[146],"Using":[147],"potential-based":[149,171],"formulation,":[150],"enhance":[152],"sparse":[153],"dense":[155],"rewards":[156],"formally":[158],"prove":[159],"maintain":[162],"policy":[163],"optimality.":[164],"call":[166],"our":[167],"novel":[168],"approach":[169],"hierarchical,":[170],"shaping":[173],"(HPRS).":[174],"Our":[175,246],"experiments":[176,247,289,294],"on":[177,243],"eight":[178],"benchmarks":[180],"demonstrate":[181],"that":[182,249,296],"HPRS":[183,201,220,250,281],"generate":[187],"satisfying":[189],"complex":[190],"hierarchical":[191,298],"Moreover,":[193],"compared":[194],"with":[195,208,224,257,268],"state":[197],"art,":[200],"achieves":[202],"faster":[203],"convergence":[204],"superior":[206],"performance":[207],"respect":[209],"rank-preserving":[212],"policy-assessment":[213],"metric.":[214],"By":[215],"balancing":[217],"competing":[218],"produces":[221],"task-satisfying":[222],"improved":[225],"without":[228,307],"manual":[229],"parameter":[230],"tuning.":[231],"Through":[232],"ablation":[233],"studies,":[234],"analyze":[236],"impact":[238],"individual":[240],"requirement":[241],"classes":[242],"emergent":[244],"behavior.":[245],"benefits":[251],"when":[255,265],"aligned":[256],"ignores":[263],"them":[264],"conflict":[267],"or":[271],"Finally,":[274],"validate":[276],"practical":[278],"usability":[279],"real-world":[283],"applications,":[285],"including":[286],"two":[287],"sim-to-real":[288,305],"using":[290],"F1TENTH":[291],"vehicles.":[292],"These":[293],"design":[299],"specifications":[302],"facilitates":[303],"transfer":[306],"any":[308],"domain":[309],"adaptation.":[310]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
