{"id":"https://openalex.org/W3132175036","doi":"https://doi.org/10.1109/lra.2021.3061372","title":"A Coach-Based Bayesian Reinforcement Learning Method for Snake Robot Control","display_name":"A Coach-Based Bayesian Reinforcement Learning Method for Snake Robot Control","publication_year":2021,"publication_date":"2021-02-23","ids":{"openalex":"https://openalex.org/W3132175036","doi":"https://doi.org/10.1109/lra.2021.3061372","mag":"3132175036"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2021.3061372","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2021.3061372","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100669026","display_name":"Yuanyuan Jia","orcid":"https://orcid.org/0000-0001-9880-5090"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuanyuan Jia","raw_affiliation_strings":["Department of Robotics, Ritsumeikan University, Shiga, Japan"],"raw_orcid":"https://orcid.org/0000-0001-9880-5090","affiliations":[{"raw_affiliation_string":"Department of Robotics, Ritsumeikan University, Shiga, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100778162","display_name":"Shugen Ma","orcid":"https://orcid.org/0000-0003-1155-8969"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shugen Ma","raw_affiliation_strings":["Department of Robotics, Ritsumeikan University, Shiga, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1155-8969","affiliations":[{"raw_affiliation_string":"Department of Robotics, Ritsumeikan University, Shiga, Japan","institution_ids":["https://openalex.org/I135768898"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I135768898"],"apc_list":null,"apc_paid":null,"fwci":3.3835,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.92469199,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":"6","issue":"2","first_page":"2319","last_page":"2326"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.9697999954223633,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9620000123977661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7857633829116821},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6632471084594727},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.654448390007019},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5748152732849121},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5119744539260864},{"id":"https://openalex.org/keywords/obstacle-avoidance","display_name":"Obstacle avoidance","score":0.46659019589424133},{"id":"https://openalex.org/keywords/robot-control","display_name":"Robot control","score":0.4519486427307129},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43357226252555847},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.4199155569076538},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41559261083602905},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.4127490520477295},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.35280758142471313},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.3165079355239868},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2226230502128601}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7857633829116821},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6632471084594727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.654448390007019},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5748152732849121},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5119744539260864},{"id":"https://openalex.org/C6683253","wikidata":"https://www.wikidata.org/wiki/Q7075535","display_name":"Obstacle avoidance","level":4,"score":0.46659019589424133},{"id":"https://openalex.org/C65401140","wikidata":"https://www.wikidata.org/wiki/Q7353385","display_name":"Robot control","level":4,"score":0.4519486427307129},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43357226252555847},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.4199155569076538},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41559261083602905},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.4127490520477295},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.35280758142471313},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.3165079355239868},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2226230502128601},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2021.3061372","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2021.3061372","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320335401","display_name":"Ritsumeikan Global Innovation Research Organization, Ritsumeikan University","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W186521003","https://openalex.org/W582134693","https://openalex.org/W1573670163","https://openalex.org/W1739716134","https://openalex.org/W1964258484","https://openalex.org/W1964941426","https://openalex.org/W2011360287","https://openalex.org/W2043218551","https://openalex.org/W2057699393","https://openalex.org/W2081505722","https://openalex.org/W2099428832","https://openalex.org/W2108002878","https://openalex.org/W2140135625","https://openalex.org/W2160067530","https://openalex.org/W2530907443","https://openalex.org/W2593272389","https://openalex.org/W2785361559","https://openalex.org/W2794308328","https://openalex.org/W2887791231","https://openalex.org/W2889546910","https://openalex.org/W2891052160","https://openalex.org/W2912971983","https://openalex.org/W2963873508","https://openalex.org/W2964043796","https://openalex.org/W2964059111","https://openalex.org/W2991570803","https://openalex.org/W3100789280","https://openalex.org/W3103532359","https://openalex.org/W3123298421","https://openalex.org/W4388323202","https://openalex.org/W6617145748","https://openalex.org/W6680657880","https://openalex.org/W6683935339","https://openalex.org/W6692846177","https://openalex.org/W6754173080","https://openalex.org/W6779886872"],"related_works":["https://openalex.org/W4223607701","https://openalex.org/W4220992570","https://openalex.org/W3025934274","https://openalex.org/W1560187912","https://openalex.org/W2694175881","https://openalex.org/W2965554769","https://openalex.org/W2034476524","https://openalex.org/W2279406337","https://openalex.org/W2662536592","https://openalex.org/W2131994932"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"usually":[3],"needs":[4],"thousands":[5],"of":[6,32,35,82],"episodes,":[7],"leading":[8],"its":[9],"applications":[10],"on":[11,112],"physical":[12],"robots":[13],"expensive":[14],"and":[15,90,103,115],"challenging.":[16],"Little":[17],"research":[18],"has":[19,109],"been":[20,110],"reported":[21],"about":[22],"snake":[23,45,95],"robot":[24,46,96],"control":[25,97],"using":[26],"RL":[27,75],"due":[28],"to":[29,72,98],"additional":[30],"difficulty":[31],"high":[33],"redundancy":[34],"freedom.":[36],"We":[37],"propose":[38],"a":[39,63,69],"coach-based":[40],"deep":[41],"learning":[42],"method":[43],"for":[44,94],"control,":[47],"which":[48],"can":[49],"effectively":[50],"save":[51],"convergence":[52],"time":[53],"with":[54,85,120],"much":[55],"less":[56],"episodes.":[57],"The":[58,107],"main":[59],"contributions":[60],"include:":[61],"1)":[62],"unified":[64],"graph-based":[65],"Bayesian":[66],"framework":[67],"integrating":[68],"coach":[70],"module":[71],"guide":[73],"the":[74],"agent;":[76],"2)":[77],"an":[78,88],"explicit":[79],"stochastic":[80],"formulation":[81],"robot-environment":[83],"interaction":[84],"uncertainty;":[86],"3)":[87],"efficient":[89],"robust":[91],"training":[92],"process":[93],"achieve":[99],"both":[100,113],"path":[101],"planning":[102],"obstacle":[104],"avoidance":[105],"simultaneously.":[106],"performance":[108],"demonstrated":[111],"simulation":[114],"real-world":[116],"data":[117],"in":[118],"comparison":[119],"state-of-the-art,":[121],"showing":[122],"promising":[123],"results.":[124]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}