{"id":"https://openalex.org/W4386159676","doi":"https://doi.org/10.1109/icarm58088.2023.10218831","title":"SIRL: Self-Imitation Reinforcement Learning for Single-step Hitting Tasks","display_name":"SIRL: Self-Imitation Reinforcement Learning for Single-step Hitting Tasks","publication_year":2023,"publication_date":"2023-07-08","ids":{"openalex":"https://openalex.org/W4386159676","doi":"https://doi.org/10.1109/icarm58088.2023.10218831"},"language":"en","primary_location":{"id":"doi:10.1109/icarm58088.2023.10218831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarm58088.2023.10218831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Advanced Robotics and Mechatronics (ICARM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058229499","display_name":"Yongle Luo","orcid":"https://orcid.org/0000-0001-9933-3611"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yongle Luo","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100427468","display_name":"Yuxin Wang","orcid":"https://orcid.org/0000-0002-0228-6220"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Wang","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101958555","display_name":"Kun Dong","orcid":"https://orcid.org/0000-0002-5440-185X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Dong","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104305408","display_name":"Yu Liu","orcid":"https://orcid.org/0009-0004-8537-2256"},"institutions":[{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Liu","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727179","display_name":"Zhiyong Sun","orcid":"https://orcid.org/0000-0001-9510-4897"},"institutions":[{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Sun","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381999","display_name":"Qiang Zhang","orcid":"https://orcid.org/0000-0003-3776-9799"},"institutions":[{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Zhang","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060524697","display_name":"Bo Song","orcid":"https://orcid.org/0000-0003-2307-8524"},"institutions":[{"id":"https://openalex.org/I4210099079","display_name":"Institute of Intelligent Machines","ror":"https://ror.org/00w0qep84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I2802624667","https://openalex.org/I4210099079"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Song","raw_affiliation_strings":["Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","University of Science and Technology of China, Hefei, China","Jianghuai Frontier Technology Coordination and Innovation Center, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031","institution_ids":["https://openalex.org/I4210099079"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"Jianghuai Frontier Technology Coordination and Innovation Center, Hefei, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5058229499"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I4210099079"],"apc_list":null,"apc_paid":null,"fwci":0.5237,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71322325,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"185","last_page":"190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9638000130653381,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9112852811813354},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.804079532623291},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6690394878387451},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6127793192863464},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.49940013885498047},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49413374066352844},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.45686858892440796}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9112852811813354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.804079532623291},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6690394878387451},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6127793192863464},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.49940013885498047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49413374066352844},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.45686858892440796},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icarm58088.2023.10218831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarm58088.2023.10218831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Advanced Robotics and Mechatronics (ICARM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G4733392823","display_name":null,"funder_award_id":"61973294","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2741122588","https://openalex.org/W2787938642","https://openalex.org/W2804380964","https://openalex.org/W2963864421","https://openalex.org/W2963940579","https://openalex.org/W2964120017","https://openalex.org/W2990181595","https://openalex.org/W2990747716","https://openalex.org/W3037785630","https://openalex.org/W3041202696","https://openalex.org/W3088304681","https://openalex.org/W3088811935","https://openalex.org/W3094542670","https://openalex.org/W3098951764","https://openalex.org/W3113558818","https://openalex.org/W3115706066","https://openalex.org/W3131531515","https://openalex.org/W3135286130","https://openalex.org/W3163195245","https://openalex.org/W3176329889","https://openalex.org/W3196877932","https://openalex.org/W3198020111","https://openalex.org/W3207061160","https://openalex.org/W3207311111","https://openalex.org/W3212211932","https://openalex.org/W4282006256","https://openalex.org/W4286374783","https://openalex.org/W4292110955","https://openalex.org/W4298857966","https://openalex.org/W4300799055","https://openalex.org/W4312393066","https://openalex.org/W4312695529","https://openalex.org/W6637967152","https://openalex.org/W6684921986","https://openalex.org/W6740801417","https://openalex.org/W6742461812","https://openalex.org/W6748839928","https://openalex.org/W6751540476","https://openalex.org/W6759312711","https://openalex.org/W6782766965","https://openalex.org/W6787348534","https://openalex.org/W6801074080","https://openalex.org/W6838781277","https://openalex.org/W6840321050","https://openalex.org/W6847627544"],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W2607437843","https://openalex.org/W4295700147","https://openalex.org/W2963162727","https://openalex.org/W2877093712","https://openalex.org/W2116157560","https://openalex.org/W2106714532","https://openalex.org/W4297979791","https://openalex.org/W4310614650"],"abstract_inverted_index":{"Reinforcement":[0,44],"learning":[1,83,91,129,159],"(RL)":[2],"has":[3],"demonstrated":[4],"significant":[5],"success":[6],"in":[7,20,104,131,140],"various":[8],"sequential":[9],"decision-making":[10,31],"tasks.":[11],"However,":[12],"standard":[13,124],"RL":[14,93,125],"frameworks":[15],"suffer":[16],"from":[17],"low":[18],"efficiency":[19,136],"single-step":[21,30,40],"robotic":[22],"hitting":[23,102],"tasks,":[24,41,156],"which":[25],"require":[26],"accurate":[27],"control":[28],"and":[29,108,127,137],"under":[32],"delayed":[33],"reward.":[34],"To":[35],"address":[36],"this":[37,96],"challenge":[38],"of":[39,84,92,133],"a":[42],"Self-Imitation":[43],"Learning":[45],"(SIRL)":[46],"algorithm":[47,121],"is":[48],"proposed":[49,78,119],"to":[50,88,110],"better":[51],"utilize":[52],"each":[53],"interaction":[54,75],"sample.":[55],"With":[56],"SIRL,":[57],"the":[58,73,81,90,118,123,148,161],"agent":[59,162],"can":[60,153],"obtain":[61],"optimal":[62,86],"successful":[63],"samples":[64,87],"during":[65],"itself":[66],"interactions":[67],"without":[68],"human":[69],"demonstrations,":[70],"even":[71],"if":[72],"actual":[74],"fails.":[76],"The":[77],"SIRL":[79,120,144],"uses":[80],"self-imitation":[82,158],"these":[85,155],"accelerate":[89],"policy.":[94],"In":[95],"paper,":[97],"we":[98],"create":[99],"two":[100],"challenging":[101],"tasks":[103],"MuJoCo":[105],"simulation,":[106],"Slide,":[107],"TableTennis,":[109],"evaluate":[111],"our":[112],"approach.":[113],"Experimental":[114],"results":[115],"demonstrate":[116],"that":[117,152],"outperforms":[122],"methods":[126,130],"supervised":[128],"terms":[132],"both":[134],"sample":[135],"performance.":[138],"Especially,":[139],"sparse":[141],"reward":[142],"settings,":[143],"stands":[145],"out":[146],"as":[147,157],"only":[149],"RL-based":[150],"method":[151],"learn":[154],"provides":[160],"with":[163],"more":[164],"gradient":[165],"information":[166],"for":[167],"policy":[168],"optimization.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
