{"id":"https://openalex.org/W3133533407","doi":"https://doi.org/10.1109/iros51168.2021.9636020","title":"Self-Supervised Online Reward Shaping in Sparse-Reward Environments","display_name":"Self-Supervised Online Reward Shaping in Sparse-Reward Environments","publication_year":2021,"publication_date":"2021-09-27","ids":{"openalex":"https://openalex.org/W3133533407","doi":"https://doi.org/10.1109/iros51168.2021.9636020","mag":"3133533407"},"language":"en","primary_location":{"id":"doi:10.1109/iros51168.2021.9636020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636020","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065695538","display_name":"Farzan Memarian","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Farzan Memarian","raw_affiliation_strings":["Oden Institute for Computational Engineering and Sciences, University of Texas,Austin,TX,USA","Oden Institute for Computational Engineering and Sciences, University of Texas, Austin, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oden Institute for Computational Engineering and Sciences, University of Texas,Austin,TX,USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Oden Institute for Computational Engineering and Sciences, University of Texas, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028642056","display_name":"Wonjoon Goo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wonjoon Goo","raw_affiliation_strings":["Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033283742","display_name":"Rudolf Lioutikov","orcid":"https://orcid.org/0000-0002-8924-7514"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]},{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Rudolf Lioutikov","raw_affiliation_strings":["Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA","Intuitive Robots Lab, Karlsruhe Institut of Technology, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA","institution_ids":[]},{"raw_affiliation_string":"Intuitive Robots Lab, Karlsruhe Institut of Technology, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas,Austin,TX,USA","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Personal Autonomous Robotics Lab (PeARL), The University of Texas, Austin, TX, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047471973","display_name":"Ufuk Topcu","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ufuk Topcu","raw_affiliation_strings":["University of Texas,Department of Aerospace Engineering and Engineering Mechanics,Austin,TX,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Texas,Department of Aerospace Engineering and Engineering Mechanics,Austin,TX,USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.8049,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.94676525,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2369","last_page":"2375"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7306004762649536},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6502293348312378},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5934228897094727},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5896499156951904},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5271722078323364},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5200424194335938},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5075058341026306}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7306004762649536},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6502293348312378},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5934228897094727},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5896499156951904},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5271722078323364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5200424194335938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5075058341026306},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros51168.2021.9636020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636020","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310620","display_name":"University of Texas at Austin","ror":"https://ror.org/00hj54h04"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W88199814","https://openalex.org/W1597864774","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W1999874108","https://openalex.org/W2050985708","https://openalex.org/W2061562262","https://openalex.org/W2121863487","https://openalex.org/W2130750514","https://openalex.org/W2145339207","https://openalex.org/W2151382427","https://openalex.org/W2155007355","https://openalex.org/W2155968351","https://openalex.org/W2158782408","https://openalex.org/W2164419340","https://openalex.org/W2397581010","https://openalex.org/W2575705757","https://openalex.org/W2746553466","https://openalex.org/W2763110165","https://openalex.org/W2781726626","https://openalex.org/W2788781499","https://openalex.org/W2804380964","https://openalex.org/W2902982219","https://openalex.org/W2913350117","https://openalex.org/W2962937519","https://openalex.org/W2962943921","https://openalex.org/W2963308241","https://openalex.org/W2963523627","https://openalex.org/W2963646405","https://openalex.org/W2964043796","https://openalex.org/W2964062135","https://openalex.org/W2964118020","https://openalex.org/W2964161785","https://openalex.org/W2964263543","https://openalex.org/W2980192220","https://openalex.org/W2984524734","https://openalex.org/W3000757100","https://openalex.org/W3008082783","https://openalex.org/W3015812362","https://openalex.org/W3034232789","https://openalex.org/W3034823751","https://openalex.org/W3120789931","https://openalex.org/W3195923140","https://openalex.org/W4288029271","https://openalex.org/W4288284400","https://openalex.org/W4289288058","https://openalex.org/W4321392130","https://openalex.org/W6638088447","https://openalex.org/W6682205418","https://openalex.org/W6682849425","https://openalex.org/W6684159546","https://openalex.org/W6692846177","https://openalex.org/W6712392314","https://openalex.org/W6739585900","https://openalex.org/W6747473740","https://openalex.org/W6748523217","https://openalex.org/W6751540476","https://openalex.org/W6753960536","https://openalex.org/W6759301632","https://openalex.org/W6761908843","https://openalex.org/W6767226293","https://openalex.org/W6769174716","https://openalex.org/W6773844738","https://openalex.org/W6774365965","https://openalex.org/W7005110637"],"related_works":["https://openalex.org/W2188500270","https://openalex.org/W2303858293","https://openalex.org/W2915512527","https://openalex.org/W51364034","https://openalex.org/W2055243143","https://openalex.org/W2793336762","https://openalex.org/W2091548507","https://openalex.org/W2368816706","https://openalex.org/W3159414774","https://openalex.org/W4385728102"],"abstract_inverted_index":{"We":[0,68],"introduce":[1,69],"Self-supervised":[2],"Online":[3],"Reward":[4],"Shaping":[5],"(SORS),":[6],"which":[7],"aims":[8],"to":[9,137],"improve":[10],"the":[11,51,55,61,80,86,90,110],"sample":[12,118,134],"efficiency":[13,135],"of":[14,79,89],"any":[15],"RL":[16,123],"algorithm":[17,112],"in":[18],"sparse-reward":[19,103],"environments":[20,104],"by":[21,47],"automatically":[22],"densifying":[23],"rewards.":[24],"The":[25],"proposed":[26,111],"framework":[27],"alternates":[28],"between":[29],"classification-based":[30],"reward":[31,39,45,66,81,141],"inference":[32,46],"and":[33],"policy":[34,56,88],"update":[35,57],"steps\u2014the":[36],"original":[37,91],"sparse":[38,126],"provides":[40],"a":[41,121],"self-supervisory":[42],"signal":[43],"for":[44],"ranking":[48],"trajectories":[49],"that":[50,71],"agent":[52],"observes,":[53],"while":[54,93],"is":[58,113],"performed":[59],"with":[60],"newly":[62],"inferred,":[63],"typically":[64],"dense":[65,140],"function.":[67],"theory":[70],"shows":[72],"that,":[73,106],"under":[74],"certain":[75],"conditions,":[76],"this":[77],"alteration":[78],"function":[82],"will":[83],"not":[84,114],"change":[85],"optimal":[87],"MDP,":[92],"potentially":[94],"increasing":[95],"learning":[96],"speed":[97],"significantly.":[98],"Experimental":[99],"results":[100],"on":[101],"several":[102],"demonstrate":[105],"across":[107],"multiple":[108],"domains,":[109],"only":[115],"significantly":[116],"more":[117],"efficient":[119],"than":[120],"standard":[122],"baseline":[124],"using":[125],"rewards,":[127],"but,":[128],"at":[129],"times,":[130],"also":[131],"achieves":[132],"similar":[133],"compared":[136],"when":[138],"hand-designed":[139],"functions":[142],"are":[143],"used.":[144]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
