{"id":"https://openalex.org/W4304142069","doi":"https://doi.org/10.1109/icac55051.2022.9911100","title":"Abstract Demonstrations and Adaptive Exploration for Efficient and Stable Multi-step Sparse Reward Reinforcement Learning","display_name":"Abstract Demonstrations and Adaptive Exploration for Efficient and Stable Multi-step Sparse Reward Reinforcement Learning","publication_year":2022,"publication_date":"2022-09-01","ids":{"openalex":"https://openalex.org/W4304142069","doi":"https://doi.org/10.1109/icac55051.2022.9911100"},"language":"en","primary_location":{"id":"doi:10.1109/icac55051.2022.9911100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icac55051.2022.9911100","pdf_url":null,"source":{"id":"https://openalex.org/S4363608428","display_name":"2022 27th International Conference on Automation and Computing (ICAC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 27th International Conference on Automation and Computing (ICAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063751533","display_name":"Xintong Yang","orcid":"https://orcid.org/0000-0002-7612-614X"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Xintong Yang","raw_affiliation_strings":["Cardiff University,School of Engineering,Cardiff,UK","School of Engineering, Cardiff University, Cardiff, UK"],"affiliations":[{"raw_affiliation_string":"Cardiff University,School of Engineering,Cardiff,UK","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"School of Engineering, Cardiff University, Cardiff, UK","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068175770","display_name":"Ze Ji","orcid":"https://orcid.org/0000-0002-8968-9902"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ze Ji","raw_affiliation_strings":["Cardiff University,School of Engineering,Cardiff,UK","School of Engineering, Cardiff University, Cardiff, UK"],"affiliations":[{"raw_affiliation_string":"Cardiff University,School of Engineering,Cardiff,UK","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"School of Engineering, Cardiff University, Cardiff, UK","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013863229","display_name":"Jing Wu","orcid":"https://orcid.org/0000-0001-5123-9861"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jing Wu","raw_affiliation_strings":["Cardiff University,School of Computer Science and Informatics,Cardiff,UK","School of Computer Science and Informatics, Cardiff University, Cardiff, UK"],"affiliations":[{"raw_affiliation_string":"Cardiff University,School of Computer Science and Informatics,Cardiff,UK","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, UK","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067850699","display_name":"Yu\u2010Kun Lai","orcid":"https://orcid.org/0000-0002-2094-5680"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yu-Kun Lai","raw_affiliation_strings":["Cardiff University,School of Computer Science and Informatics,Cardiff,UK","School of Computer Science and Informatics, Cardiff University, Cardiff, UK"],"affiliations":[{"raw_affiliation_string":"Cardiff University,School of Computer Science and Informatics,Cardiff,UK","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, UK","institution_ids":["https://openalex.org/I79510175"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063751533"],"corresponding_institution_ids":["https://openalex.org/I79510175"],"apc_list":null,"apc_paid":null,"fwci":0.3118,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.5123357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"518","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8652873039245605},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7404015064239502},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6889758706092834},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6785387396812439},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.47770121693611145},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.464138925075531},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39551693201065063},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3149280548095703},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08921489119529724},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07211586833000183}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8652873039245605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7404015064239502},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6889758706092834},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6785387396812439},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.47770121693611145},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.464138925075531},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39551693201065063},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3149280548095703},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08921489119529724},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07211586833000183},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icac55051.2022.9911100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icac55051.2022.9911100","pdf_url":null,"source":{"id":"https://openalex.org/S4363608428","display_name":"2022 27th International Conference on Automation and Computing (ICAC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 27th International Conference on Automation and Computing (ICAC)","raw_type":"proceedings-article"},{"id":"pmh:oai:https://orca.cardiff.ac.uk:151519","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401195","display_name":"ORCA Online Research @Cardiff (Cardiff University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79510175","host_organization_name":"Cardiff University","host_organization_lineage":["https://openalex.org/I79510175"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1726275262","https://openalex.org/W2109910161","https://openalex.org/W2119709400","https://openalex.org/W2121863487","https://openalex.org/W2144033270","https://openalex.org/W2145339207","https://openalex.org/W2337392266","https://openalex.org/W2468062401","https://openalex.org/W2604382266","https://openalex.org/W2793380548","https://openalex.org/W2904246096","https://openalex.org/W2963099939","https://openalex.org/W2963864421","https://openalex.org/W2964311356","https://openalex.org/W2994446013","https://openalex.org/W3034314203","https://openalex.org/W3040707741","https://openalex.org/W3093426589","https://openalex.org/W3117423974","https://openalex.org/W3134939669","https://openalex.org/W3212211932","https://openalex.org/W4287864040","https://openalex.org/W4297669644","https://openalex.org/W4300799055","https://openalex.org/W6637620875","https://openalex.org/W6677916085","https://openalex.org/W6684921986","https://openalex.org/W6730038592","https://openalex.org/W6740801417","https://openalex.org/W6745405225","https://openalex.org/W6749986616","https://openalex.org/W6757592117","https://openalex.org/W6773937831","https://openalex.org/W6779195239","https://openalex.org/W6790911039"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2110944602","https://openalex.org/W3213722473"],"abstract_inverted_index":{"Although":[0],"Deep":[1],"Reinforcement":[2],"Learning":[3],"(DRL)":[4],"has":[5],"been":[6],"popular":[7,126],"in":[8,139],"many":[9],"disciplines":[10],"including":[11],"robotics,":[12],"state-of-the-art":[13],"DRL":[14,48,127],"algorithms":[15,128],"still":[16],"struggle":[17],"to":[18,83,133],"learn":[19,134],"long-horizon,":[20],"multistep":[21],"and":[22,63,75,99,106,115,131,137],"sparse":[23],"reward":[24,35],"tasks,":[25,43],"such":[26,42],"as":[27],"stacking":[28],"several":[29,112],"blocks":[30],"given":[31],"only":[32],"a":[33,47,70],"task-completion":[34],"signal.":[36],"To":[37],"improve":[38],"learning":[39],"efficiency":[40],"for":[41,96,102],"this":[44],"paper":[45],"proposes":[46],"exploration":[49],"technique,":[50],"termed":[51],"$\\mathbf{A^{2}}$,":[52],"which":[53],"integrates":[54],"two":[55],"components":[56],"inspired":[57],"by":[58,68],"human":[59],"experiences:":[60],"Abstract":[61],"demonstrations":[62],"Adaptive":[64],"exploration.":[65],"$\\mathbf{A^{2}}$":[66,123],"starts":[67],"decomposing":[69],"complex":[71],"task":[72],"into":[73],"subtasks,":[74],"then":[76],"provides":[77],"the":[78,87,90],"correct":[79],"orders":[80],"of":[81],"subtasks":[82,98],"learn.":[84],"During":[85],"training,":[86],"agent":[88],"explores":[89],"environment":[91],"adaptively,":[92],"acting":[93],"more":[94,100,135],"deterministically":[95],"well-mastered":[97],"stochastically":[101],"ill-learnt":[103],"subtasks.":[104],"Ablation":[105],"comparative":[107],"experiments":[108],"are":[109],"conducted":[110],"on":[111],"grid-world":[113],"tasks":[114],"three":[116],"robotic":[117],"manipulation":[118],"tasks.":[119],"We":[120],"demonstrate":[121],"that":[122],"can":[124],"aid":[125],"(DQN,":[129],"DDPG,":[130],"SAC)":[132],"efficiently":[136],"stably":[138],"these":[140],"environments.":[141]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-10-11T00:00:00"}
