{"id":"https://openalex.org/W4401413949","doi":"https://doi.org/10.1109/icra57147.2024.10610391","title":"Barrier Functions Inspired Reward Shaping for Reinforcement Learning","display_name":"Barrier Functions Inspired Reward Shaping for Reinforcement Learning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401413949","doi":"https://doi.org/10.1109/icra57147.2024.10610391"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610391","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610391","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094077012","display_name":"Nilaksh Nilaksh","orcid":null},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Nilaksh","raw_affiliation_strings":["Indian Institute of Technology (IIT),Kharagpur"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (IIT),Kharagpur","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067605878","display_name":"Abhishek Ranjan","orcid":"https://orcid.org/0000-0003-3289-6705"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Abhishek Ranjan","raw_affiliation_strings":["Indian Institute of Science (IISc),Bangalore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science (IISc),Bangalore","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009177175","display_name":"Shreenabh Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shreenabh Agrawal","raw_affiliation_strings":["Indian Institute of Science (IISc),Bangalore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science (IISc),Bangalore","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100585764","display_name":"Aayush Jain","orcid":null},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aayush Jain","raw_affiliation_strings":["Indian Institute of Technology (IIT),Kharagpur"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (IIT),Kharagpur","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073867567","display_name":"Pushpak Jagtap","orcid":"https://orcid.org/0000-0002-5452-8850"},"institutions":[{"id":"https://openalex.org/I4210110745","display_name":"Interaction Institute for Social Change","ror":"https://ror.org/01qyb4t36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210110745"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pushpak Jagtap","raw_affiliation_strings":["RBCCPS, IISc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RBCCPS, IISc","institution_ids":["https://openalex.org/I4210110745"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084523677","display_name":"Shishir Kolathaya","orcid":"https://orcid.org/0000-0001-8689-2318"},"institutions":[{"id":"https://openalex.org/I4210110745","display_name":"Interaction Institute for Social Change","ror":"https://ror.org/01qyb4t36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210110745"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shishir Kolathaya","raw_affiliation_strings":["CSA &#x0026; RBCCPS, IISc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CSA &#x0026; RBCCPS, IISc","institution_ids":["https://openalex.org/I4210110745"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10807","last_page":"10813"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8072268962860107},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6355211734771729},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5051751732826233},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4150979518890381},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.21095609664916992},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.11845692992210388}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8072268962860107},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6355211734771729},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5051751732826233},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4150979518890381},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.21095609664916992},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.11845692992210388}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610391","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610391","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1574246016","https://openalex.org/W1592847719","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W2087617385","https://openalex.org/W2091565802","https://openalex.org/W2111316871","https://openalex.org/W2145339207","https://openalex.org/W2146396918","https://openalex.org/W2158969944","https://openalex.org/W2539402368","https://openalex.org/W2620974420","https://openalex.org/W2736601468","https://openalex.org/W2788455270","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W3004091789","https://openalex.org/W3005355833","https://openalex.org/W3108060258","https://openalex.org/W3213974477","https://openalex.org/W4226278401","https://openalex.org/W4237591687","https://openalex.org/W4280636619","https://openalex.org/W4298181573","https://openalex.org/W4315706776","https://openalex.org/W4321392130","https://openalex.org/W4389610038","https://openalex.org/W6627932998","https://openalex.org/W6638088447","https://openalex.org/W6682205418","https://openalex.org/W6739055881","https://openalex.org/W6739585900","https://openalex.org/W6741002519","https://openalex.org/W6748839928","https://openalex.org/W6800004206","https://openalex.org/W6810738896","https://openalex.org/W6838566501","https://openalex.org/W6843975101","https://openalex.org/W6847063038","https://openalex.org/W6848588639"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"has":[3],"progressed":[4],"from":[5],"simple":[6],"control":[7,133],"tasks":[8],"to":[9,105,118],"complex":[10],"real-world":[11,90],"challenges":[12],"with":[13,89,126,139],"large":[14],"state":[15],"spaces.":[16],"While":[17],"RL":[18],"excels":[19],"in":[20],"these":[21],"tasks,":[22],"training":[23],"time":[24],"remains":[25],"a":[26,31,49,123],"limitation.":[27],"Reward":[28],"shaping":[29],"is":[30],"popular":[32],"solution,":[33],"but":[34],"existing":[35],"methods":[36],"often":[37],"rely":[38],"on":[39,82,92],"value":[40],"functions,":[41,57],"which":[42],"face":[43],"scalability":[44],"issues.":[45],"This":[46],"paper":[47],"presents":[48],"novel":[50],"safety-oriented":[51],"reward-shaping":[52],"framework":[53],"inspired":[54],"by":[55],"barrier":[56],"offering":[58],"simplicity":[59],"and":[60,67,85,110,134],"ease":[61],"of":[62,73,136],"implementation":[63],"across":[64],"various":[65],"environments":[66],"tasks.":[68],"To":[69],"evaluate":[70],"the":[71,74,93,119,127,137],"effectiveness":[72],"proposed":[75],"reward":[76,141],"formulations,":[77],"we":[78,130],"conduct":[79],"simulation":[80],"experiments":[81],"CartPole,":[83],"Ant,":[84],"Humanoid":[86],"environments,":[87],"along":[88],"deployment":[91],"Unitree":[94],"Go1":[95,128],"quadruped":[96],"robot.":[97],"Our":[98],"results":[99],"demonstrate":[100],"that":[101],"our":[102,140,146],"method":[103],"leads":[104],"1.4-2.8":[106],"times":[107],"faster":[108],"convergence":[109],"as":[111,113],"low":[112],"50-60%":[114],"actuation":[115],"effort":[116],"compared":[117],"vanilla":[120],"reward.":[121],"In":[122],"sim-to-real":[124],"experiment":[125],"robot,":[129],"demonstrated":[131],"better":[132],"dynamics":[135],"bot":[138],"framework.":[142],"We":[143],"have":[144],"open-sourced":[145],"code":[147],"at":[148],"https://github.com/Safe-RL-IISc/barrier_shaping.":[149]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2024-08-09T00:00:00"}