{"id":"https://openalex.org/W4200534645","doi":"https://doi.org/10.1109/iros51168.2021.9636061","title":"Shaping Progressive Net of Reinforcement Learning for Policy Transfer with Human Evaluative Feedback","display_name":"Shaping Progressive Net of Reinforcement Learning for Policy Transfer with Human Evaluative Feedback","publication_year":2021,"publication_date":"2021-09-27","ids":{"openalex":"https://openalex.org/W4200534645","doi":"https://doi.org/10.1109/iros51168.2021.9636061"},"language":"en","primary_location":{"id":"doi:10.1109/iros51168.2021.9636061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636061","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065250586","display_name":"Rongshun Juan","orcid":"https://orcid.org/0000-0002-0840-2528"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rongshun Juan","raw_affiliation_strings":["College of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048299480","display_name":"Jie Huang","orcid":"https://orcid.org/0000-0003-1570-6797"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Huang","raw_affiliation_strings":["College of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052470310","display_name":"Randy G\u00f3mez","orcid":"https://orcid.org/0000-0002-3191-6818"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Randy Gomez","raw_affiliation_strings":["Honda Research Institute Japan Co., Ltd, Wako, Japan"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co., Ltd, Wako, Japan","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049329632","display_name":"Keisuke Nakamura","orcid":"https://orcid.org/0000-0002-4979-2083"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Nakamura","raw_affiliation_strings":["Honda Research Institute Japan Co., Ltd, Wako, Japan"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co., Ltd, Wako, Japan","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001995630","display_name":"Qixin Sha","orcid":"https://orcid.org/0000-0003-0292-3231"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qixin Sha","raw_affiliation_strings":["College of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113949081","display_name":"Bo He","orcid":null},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo He","raw_affiliation_strings":["College of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101812014","display_name":"Guangliang Li","orcid":"https://orcid.org/0000-0003-1728-5711"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangliang Li","raw_affiliation_strings":["College of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065250586"],"corresponding_institution_ids":["https://openalex.org/I59028903"],"apc_list":null,"apc_paid":null,"fwci":0.754,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.74864461,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1281","last_page":"1288"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8735669851303101},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7722715139389038},{"id":"https://openalex.org/keywords/trainer","display_name":"Trainer","score":0.6614222526550293},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6003364324569702},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5971893072128296},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5573955178260803},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4971454441547394},{"id":"https://openalex.org/keywords/interactive-learning","display_name":"Interactive Learning","score":0.46728286147117615},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4296887516975403},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4253520667552948},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.3823569715023041},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33196622133255005},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32866811752319336},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.11266860365867615},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09670701622962952}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8735669851303101},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7722715139389038},{"id":"https://openalex.org/C2780463512","wikidata":"https://www.wikidata.org/wiki/Q15122700","display_name":"Trainer","level":2,"score":0.6614222526550293},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6003364324569702},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5971893072128296},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5573955178260803},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4971454441547394},{"id":"https://openalex.org/C2776716048","wikidata":"https://www.wikidata.org/wiki/Q6045290","display_name":"Interactive Learning","level":2,"score":0.46728286147117615},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4296887516975403},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4253520667552948},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.3823569715023041},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33196622133255005},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32866811752319336},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.11266860365867615},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09670701622962952},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros51168.2021.9636061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636061","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W745775011","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W2098441518","https://openalex.org/W2099597159","https://openalex.org/W2107726111","https://openalex.org/W2110064869","https://openalex.org/W2121110499","https://openalex.org/W2145339207","https://openalex.org/W2156869222","https://openalex.org/W2171278097","https://openalex.org/W2174364281","https://openalex.org/W2257979135","https://openalex.org/W2530944449","https://openalex.org/W2605102758","https://openalex.org/W2904455790","https://openalex.org/W2944766483","https://openalex.org/W2952629144","https://openalex.org/W3004006877","https://openalex.org/W3085267010","https://openalex.org/W3101442004","https://openalex.org/W4300110528","https://openalex.org/W4302570325","https://openalex.org/W4319988532","https://openalex.org/W6622050690","https://openalex.org/W6638088447","https://openalex.org/W6674812091","https://openalex.org/W6684205842","https://openalex.org/W6685200760","https://openalex.org/W6728367041","https://openalex.org/W6728925229","https://openalex.org/W6732417791","https://openalex.org/W6783144354","https://openalex.org/W6849896277","https://openalex.org/W6966558720"],"related_works":["https://openalex.org/W4206357785","https://openalex.org/W4281381188","https://openalex.org/W3192840557","https://openalex.org/W2951211570","https://openalex.org/W4375928479","https://openalex.org/W3167935049","https://openalex.org/W3023427754","https://openalex.org/W3131673289","https://openalex.org/W3198847674","https://openalex.org/W3096913503"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,78],"learning":[2,29,63,79],"has":[3,129,146],"achieved":[4],"significant":[5],"success":[6],"in":[7,23,37,106,198,213],"many":[8],"fields,":[9],"but":[10,145],"will":[11],"confront":[12],"sampling":[13],"efficiency":[14],"and":[15,40,46,76,109,188],"safety":[16],"problems":[17],"when":[18,132],"applying":[19,210],"to":[20,32,140,154,161],"robot":[21],"control":[22],"the":[24,38,42,119,193,199,207,214],"real":[25,47,215],"world.":[26,48],"Sim-to-real":[27],"transfer":[28],"was":[30],"proposed":[31],"make":[33],"use":[34],"of":[35,202,209],"samples":[36],"simulation":[39,45],"overcome":[41],"gap":[43],"between":[44,185],"In":[49],"this":[50],"paper,":[51],"we":[52],"focus":[53],"on":[54,118,206],"improving":[55,192],"Progressive":[56,68,127,186],"Neural":[57],"Network":[58,69,128,187],"\u2014":[59],"an":[60,89,159],"effective":[61],"sim-to-real":[62],"method,":[64],"by":[65,88],"proposing":[66],"Interactive":[67],"Learning":[70],"(IPNL).":[71],"IPNL":[72,157],"integrates":[73],"progressive":[74],"network":[75],"interactive":[77,189],"(interactive":[80],"RL)":[81],"which":[82],"learns":[83],"from":[84,134,151],"evaluative":[85],"feedback":[86],"provided":[87],"observing":[90],"human":[91],"trainer.":[92],"We":[93],"test":[94],"our":[95,176,211],"method":[96,212],"using":[97],"five":[98],"RL":[99,190],"tasks":[100,135,153],"with":[101,115,136,142,167],"discrete":[102],"or":[103],"continuous":[104],"actions":[105],"OpenAI":[107],"Gym":[108],"a":[110,163,183],"sinusoids":[111],"curve":[112],"following":[113,201],"task":[114],"AUV":[116,203],"simulator":[117],"Gazebo":[120],"platform.":[121],"Our":[122,196],"results":[123,197],"suggest":[124],"that":[125,180],"while":[126],"good":[130],"performance":[131,169],"transferring":[133,150],"low-dimensional":[137,155],"state":[138],"space":[139],"those":[141],"high-dimensional":[143,152],"one":[144],"little":[147],"effect":[148],"for":[149,171,191],"ones,":[156],"allows":[158],"agent":[160],"learn":[162],"more":[164],"stable":[165],"policy":[166],"better":[168],"faster":[170],"both":[172],"cases.":[173],"More":[174],"importantly,":[175],"further":[177],"analysis":[178],"indicate":[179],"there":[181],"is":[182],"synergy":[184],"agent\u2019s":[194],"learning.":[195],"path":[200],"shed":[204],"light":[205],"potential":[208],"world":[216],"tasks.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
