{"id":"https://openalex.org/W3035454705","doi":"https://doi.org/10.1109/lra.2020.3013937","title":"Invariant Transform Experience Replay: Data Augmentation for Deep Reinforcement Learning","display_name":"Invariant Transform Experience Replay: Data Augmentation for Deep Reinforcement Learning","publication_year":2020,"publication_date":"2020-08-04","ids":{"openalex":"https://openalex.org/W3035454705","doi":"https://doi.org/10.1109/lra.2020.3013937","mag":"3035454705"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2020.3013937","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2020.3013937","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.10707","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yijiong Lin","orcid":"https://orcid.org/0000-0002-0063-0905"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijiong Lin","raw_affiliation_strings":["School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0063-0905","affiliations":[{"raw_affiliation_string":"School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiancong Huang","orcid":"https://orcid.org/0000-0002-4959-6664"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiancong Huang","raw_affiliation_strings":["School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-4959-6664","affiliations":[{"raw_affiliation_string":"School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Matthieu Zimmer","orcid":"https://orcid.org/0000-0002-8029-308X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Matthieu Zimmer","raw_affiliation_strings":["UM-SJTU Joint Institute, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-8029-308X","affiliations":[{"raw_affiliation_string":"UM-SJTU Joint Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yisheng Guan","orcid":"https://orcid.org/0000-0002-7011-0331"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yisheng Guan","raw_affiliation_strings":["School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7011-0331","affiliations":[{"raw_affiliation_string":"School of Electromechanical Engineering, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Juan Rojas","orcid":"https://orcid.org/0000-0002-6552-4572"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Juan Rojas","raw_affiliation_strings":["School of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0002-6552-4572","affiliations":[{"raw_affiliation_string":"School of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":null,"display_name":"Paul Weng","orcid":"https://orcid.org/0000-0002-2008-4569"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Paul Weng","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China","UM-SJTU Joint Institute, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-2008-4569","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"UM-SJTU Joint Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4895,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.86301786,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"5","issue":"4","first_page":"6615","last_page":"6622"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8531000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8531000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.06800000369548798,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kaleidoscope","display_name":"Kaleidoscope","score":0.8687999844551086},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7271999716758728},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6721000075340271},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.6539000272750854},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5967000126838684},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.498199999332428},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.421999990940094}],"concepts":[{"id":"https://openalex.org/C2778037017","wikidata":"https://www.wikidata.org/wiki/Q6351981","display_name":"Kaleidoscope","level":2,"score":0.8687999844551086},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7271999716758728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6983000040054321},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6976000070571899},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6721000075340271},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.6539000272750854},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5967000126838684},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.498199999332428},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.421999990940094},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34940001368522644},{"id":"https://openalex.org/C96469262","wikidata":"https://www.wikidata.org/wiki/Q1324364","display_name":"Homogeneous space","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C161759796","wikidata":"https://www.wikidata.org/wiki/Q3982902","display_name":"Teleoperation","level":3,"score":0.3172999918460846},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.29010000824928284},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C111151474","wikidata":"https://www.wikidata.org/wiki/Q1653368","display_name":"iCub","level":4,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2020.3013937","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2020.3013937","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1909.10707","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.10707","pdf_url":"https://arxiv.org/pdf/1909.10707","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.10707","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.10707","pdf_url":"https://arxiv.org/pdf/1909.10707","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1512516973","display_name":null,"funder_award_id":"61950410758","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5010798040","display_name":null,"funder_award_id":"61872238","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8167272471","display_name":null,"funder_award_id":"61750110521","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1592735339","https://openalex.org/W2069300867","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2150663697","https://openalex.org/W2257979135","https://openalex.org/W2618530766","https://openalex.org/W2795776994","https://openalex.org/W2913871909","https://openalex.org/W2971061343","https://openalex.org/W2990747716","https://openalex.org/W3035454705","https://openalex.org/W4245446383","https://openalex.org/W6616173779","https://openalex.org/W6636142636","https://openalex.org/W6680137543","https://openalex.org/W6683195989","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6726983090","https://openalex.org/W6738476251","https://openalex.org/W6740801417","https://openalex.org/W6745747580","https://openalex.org/W6748599296","https://openalex.org/W6755289019","https://openalex.org/W6776867236","https://openalex.org/W6776901495","https://openalex.org/W6780559895"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"is":[4,18],"a":[5,78,133,166,170,184],"promising":[6,185],"approach":[7],"for":[8,68],"adaptive":[9],"robot":[10],"control,":[11],"but":[12],"its":[13],"current":[14],"application":[15],"to":[16,31,59,187],"robotics":[17],"currently":[19],"hindered":[20],"by":[21],"high":[22],"sample":[23],"requirements.":[24],"To":[25],"alleviate":[26],"this":[27,72],"issue,":[28],"we":[29,76,87,131,163],"propose":[30],"exploit":[32],"the":[33,48,112,141,149],"symmetries":[34,40,98],"present":[35,88],"in":[36,124,140,148,157,191],"robotic":[37],"tasks.":[38],"Intuitively,":[39],"from":[41,115],"observed":[42,156],"trajectories":[43,53,182],"define":[44],"transformations":[45,179],"that":[46,86,177],"leave":[47],"space":[49],"of":[50,107],"feasible":[51,62],"RL":[52,181],"invariant":[54,178],"and":[55,99,127,136,144,162,196],"can":[56],"be":[57,66],"used":[58,67],"generate":[60],"new":[61],"trajectories,":[63],"which":[64,104],"could":[65],"training.":[69],"Based":[70],"on":[71,169,180],"data":[73],"augmentation":[74],"idea,":[75],"formulate":[77],"general":[79],"framework,":[80],"called":[81],"Invariant":[82],"Transform":[83],"Experience":[84,94,102],"Replay":[85,95,103],"with":[89,160],"two":[90],"techniques:":[91],"(i)":[92],"Kaleidoscope":[93],"exploits":[96],"reflectional":[97],"(ii)":[100],"Goal-augmented":[101],"takes":[105],"advantage":[106],"lax":[108],"goal":[109],"definitions.":[110],"In":[111],"Fetch":[113],"tasks":[114,146,159],"OpenAI":[116],"Gym,":[117],"our":[118],"experimental":[119],"results":[120],"show":[121],"significant":[122],"increases":[123],"learning":[125,190],"rates":[126],"success":[128],"rates.":[129],"Particularly,":[130],"attain":[132],"13,":[134],"3,":[135],"5":[137],"times":[138],"speedup":[139],"pushing,":[142],"sliding,":[143],"pick-and-place":[145],"respectively":[147],"multi-goal":[150],"setting.":[151],"Performance":[152],"gains":[153],"are":[154,183,199],"also":[155],"similar":[158],"obstacles":[161],"successfully":[164],"deployed":[165],"trained":[167],"policy":[168],"real":[171],"Baxter":[172],"robot.":[173],"Our":[174],"work":[175],"demonstrates":[176],"methodology":[186],"speed":[188],"up":[189],"deep":[192],"RL.":[193],"Code,":[194],"video,":[195],"supplementary":[197],"materials":[198],"available":[200],"at":[201],"[1].":[202]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2020-06-19T00:00:00"}
