{"id":"https://openalex.org/W4391171937","doi":"https://doi.org/10.1017/s0263574724000092","title":"One-shot sim-to-real transfer policy for robotic assembly via reinforcement learning with visual demonstration","display_name":"One-shot sim-to-real transfer policy for robotic assembly via reinforcement learning with visual demonstration","publication_year":2024,"publication_date":"2024-01-24","ids":{"openalex":"https://openalex.org/W4391171937","doi":"https://doi.org/10.1017/s0263574724000092"},"language":"en","primary_location":{"id":"doi:10.1017/s0263574724000092","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0263574724000092","pdf_url":null,"source":{"id":"https://openalex.org/S92163612","display_name":"Robotica","issn_l":"0263-5747","issn":["0263-5747","1469-8668"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotica","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025435742","display_name":"R. Xiao","orcid":"https://orcid.org/0000-0003-3855-0588"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruihong Xiao","raw_affiliation_strings":["School of Automation Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019906827","display_name":"Chenguang Yang","orcid":"https://orcid.org/0000-0001-5255-5559"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenguang Yang","raw_affiliation_strings":["School of Automation Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067854210","display_name":"Yiming Jiang","orcid":"https://orcid.org/0000-0001-5963-2932"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Jiang","raw_affiliation_strings":["The National Engineering Research Center for Robot Visual Perception and Control, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"The National Engineering Research Center for Robot Visual Perception and Control, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100685577","display_name":"Hui Zhang","orcid":"https://orcid.org/0000-0002-1370-1848"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Zhang","raw_affiliation_strings":["The National Engineering Research Center for Robot Visual Perception and Control, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"The National Engineering Research Center for Robot Visual Perception and Control, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5019906827"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":3.117,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91227418,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":"42","issue":"4","first_page":"1074","last_page":"1093"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11023","display_name":"Prosthetics and Rehabilitation Robotics","score":0.9383999705314636,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6534401178359985},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5800435543060303},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5494279265403748},{"id":"https://openalex.org/keywords/one-shot","display_name":"One shot","score":0.5202156901359558},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.5172553658485413},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.450887531042099},{"id":"https://openalex.org/keywords/single-shot","display_name":"Single shot","score":0.44469407200813293},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.37794554233551025},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2524241805076599},{"id":"https://openalex.org/keywords/mechanical-engineering","display_name":"Mechanical engineering","score":0.15889352560043335},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1244029700756073},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.088955819606781},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.0860319435596466},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.056189268827438354}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6534401178359985},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5800435543060303},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5494279265403748},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.5202156901359558},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.5172553658485413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.450887531042099},{"id":"https://openalex.org/C3019835501","wikidata":"https://www.wikidata.org/wiki/Q1310130","display_name":"Single shot","level":2,"score":0.44469407200813293},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37794554233551025},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2524241805076599},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.15889352560043335},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1244029700756073},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.088955819606781},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0860319435596466},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.056189268827438354},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0263574724000092","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0263574724000092","pdf_url":null,"source":{"id":"https://openalex.org/S92163612","display_name":"Robotica","issn_l":"0263-5747","issn":["0263-5747","1469-8668"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotica","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W130216483","https://openalex.org/W1969483458","https://openalex.org/W1991681756","https://openalex.org/W2042882799","https://openalex.org/W2049981393","https://openalex.org/W2098764590","https://openalex.org/W2130726249","https://openalex.org/W2141664020","https://openalex.org/W2158782408","https://openalex.org/W2160821342","https://openalex.org/W2173248099","https://openalex.org/W2605102758","https://openalex.org/W2612690371","https://openalex.org/W2736601468","https://openalex.org/W2741122588","https://openalex.org/W2766555673","https://openalex.org/W2787938642","https://openalex.org/W2788862220","https://openalex.org/W2904246096","https://openalex.org/W2910208901","https://openalex.org/W2963150697","https://openalex.org/W2963411833","https://openalex.org/W2963940579","https://openalex.org/W2978706708","https://openalex.org/W2981378444","https://openalex.org/W2989940186","https://openalex.org/W2990138404","https://openalex.org/W3000439273","https://openalex.org/W3034986117","https://openalex.org/W3088310808","https://openalex.org/W3090612618","https://openalex.org/W3120778962","https://openalex.org/W3122270880","https://openalex.org/W3137754963","https://openalex.org/W3150631816","https://openalex.org/W3216772467","https://openalex.org/W4220982128","https://openalex.org/W4221167977","https://openalex.org/W4236251699","https://openalex.org/W4285182079","https://openalex.org/W4294936113","https://openalex.org/W4309923758","https://openalex.org/W4313703173","https://openalex.org/W4313703382","https://openalex.org/W4315490076","https://openalex.org/W4327729182","https://openalex.org/W4376891105","https://openalex.org/W4379520522","https://openalex.org/W4383561639","https://openalex.org/W6662860747","https://openalex.org/W6789575692","https://openalex.org/W6804601995"],"related_works":["https://openalex.org/W2497720472","https://openalex.org/W4292659306","https://openalex.org/W3044321615","https://openalex.org/W4294892107","https://openalex.org/W2955491601","https://openalex.org/W146529714","https://openalex.org/W3142396426","https://openalex.org/W2471333042","https://openalex.org/W2316500695","https://openalex.org/W2094813019"],"abstract_inverted_index":{"Abstract":[0],"Reinforcement":[1],"learning":[2,66,78,94],"(RL)":[3],"has":[4],"been":[5],"successfully":[6],"applied":[7],"to":[8,24,67,80,101,142,148,189],"a":[9,122,139,182,187],"wealth":[10],"of":[11,54,105,112,171,218],"robot":[12,57,188],"manipulation":[13],"tasks":[14,192],"and":[15,27,38,43,108,130,159,173,201,223],"continuous":[16],"control":[17],"problems.":[18],"However,":[19],"it":[20],"is":[21,99,119],"still":[22],"limited":[23,195],"industrial":[25],"applications":[26],"suffers":[28],"from":[29],"three":[30],"major":[31],"challenges:":[32],"sample":[33],"inefficiency,":[34],"real":[35,61],"data":[36],"collection,":[37],"the":[39,51,60,69,88,103,110,113,144,153,156,161,165],"gap":[40],"between":[41],"simulator":[42,158],"reality.":[44,149],"In":[45],"this":[46],"paper,":[47],"we":[48],"focus":[49],"on":[50,164],"practical":[52],"application":[53],"RL":[55],"for":[56,134],"assembly":[58,117,177,191],"in":[59,84,155,216],"world.":[62],"We":[63,91,136,150],"apply":[64],"enlightenment":[65,93],"improve":[68,109],"proximal":[70],"policy":[71,106],"optimization,":[72],"an":[73,82],"on-policy":[74],"model-free":[75],"actor-critic":[76],"reinforcement":[77],"algorithm,":[79],"train":[81],"agent":[83],"Cartesian":[85],"space":[86],"using":[87,194],"proprioceptive":[89],"information.":[90],"introduce":[92],"incorporated":[95],"via":[96],"pretraining,":[97],"which":[98],"beneficial":[100],"reduce":[102],"cost":[104],"training":[107,219],"effectiveness":[111],"policy.":[114],"A":[115],"human-like":[116],"trajectory":[118],"generated":[120],"through":[121],"two-step":[123],"method":[124,163],"with":[125],"segmenting":[126],"objects":[127],"by":[128,198],"locations":[129],"iterative":[131],"closest":[132],"point":[133],"pretraining.":[135],"also":[137],"design":[138],"sim-to-real":[140],"controller":[141],"correct":[143],"error":[145],"while":[146],"transferring":[147],"set":[151],"up":[152],"environment":[154],"MuJoCo":[157],"demonstrated":[160],"proposed":[162],"recently":[166],"established":[167],"The":[168,179,204],"National":[169],"Institute":[170],"Standards":[172],"Technology":[174],"(NIST)":[175],"gear":[176],"benchmark.":[178],"paper":[180],"introduces":[181],"unique":[183],"framework":[184],"that":[185,209],"enables":[186],"learn":[190],"efficiently":[193],"real-world":[196],"samples":[197],"leveraging":[199],"simulations":[200],"visual":[202],"demonstrations.":[203],"comparative":[205],"experiment":[206],"results":[207],"indicate":[208],"our":[210],"approach":[211],"surpasses":[212],"other":[213],"baseline":[214],"methods":[215],"terms":[217],"speed,":[220],"success":[221],"rate,":[222],"efficiency.":[224]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
