{"id":"https://openalex.org/W4415428206","doi":"https://doi.org/10.3233/faia251129","title":"Training Robotic Self-Evolving with GRPO","display_name":"Training Robotic Self-Evolving with GRPO","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428206","doi":"https://doi.org/10.3233/faia251129"},"language":null,"primary_location":{"id":"doi:10.3233/faia251129","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251129","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251129","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010254162","display_name":"Q. B. Yi","orcid":"https://orcid.org/0009-0001-7168-3481"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qinpeng Yi","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101801999","display_name":"Ping Zhang","orcid":"https://orcid.org/0000-0003-0803-5462"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ping Zhang","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100767753","display_name":"Junwei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junwei Chen","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5010254162"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67239691,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9750999808311462,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9750999808311462,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6940000057220459},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6499999761581421},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6288999915122986},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5356000065803528},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.5328999757766724},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.49160000681877136},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4198000133037567}],"concepts":[{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6940000057220459},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6499999761581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6492999792098999},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6288999915122986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6128000020980835},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5356000065803528},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.5328999757766724},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.49160000681877136},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4447999894618988},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.39590001106262207},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3928999900817871},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.3725999891757965},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33149999380111694},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C56397880","wikidata":"https://www.wikidata.org/wiki/Q6044094","display_name":"Intelligent decision support system","level":2,"score":0.2513999938964844},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251129","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251129","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251129","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251129","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"embodied":[1],"robots":[2,24,154],"heavily":[3],"depend":[4],"on":[5,127],"pre-trained":[6,37],"models,":[7],"whose":[8],"capabilities":[9],"are":[10,25],"inherently":[11],"constrained":[12],"by":[13],"the":[14,42,54,67,95,104,112,138,149,160,185],"data":[15,88,133],"they":[16],"were":[17],"originally":[18],"trained":[19],"on.":[20],"However,":[21],"truly":[22],"intelligent":[23,153],"expected":[26],"to":[27,80,85,93,115,130],"improve":[28,157],"themselves":[29,158],"autonomously":[30],"when":[31],"encountering":[32],"novel":[33,123],"environments":[34,84],"where":[35],"these":[36],"models":[38],"fall":[39],"short.This":[40],"is":[41,101],"capability":[43],"we":[44,52,62,120,166],"define":[45],"as":[46],"self-evolving":[47,55,170],"ability.":[48],"In":[49],"this":[50,64],"paper,":[51],"investigate":[53],"capacity":[56],"of":[57,140,151,162,187],"robotic":[58,169],"vision":[59,96],"models.":[60],"Specifically,":[61],"simulate":[63],"process":[65],"using":[66],"R3ED":[68],"dataset":[69],"and":[70,107,174,184],"propose":[71],"a":[72,77,122,172],"training":[73,99,144],"framework":[74],"in":[75],"which":[76],"policy":[78,113],"learns":[79],"navigate":[81],"through":[82],"unfamiliar":[83],"collect":[86],"informative":[87],"that":[89,155,168],"can":[90,156],"be":[91],"used":[92],"refine":[94],"model.":[97],"Our":[98,146],"pipeline":[100],"built":[102],"upon":[103],"GRPO":[105],"algorithm":[106],"incorporates":[108],"historical":[109],"states":[110],"into":[111],"design":[114],"enhance":[116],"contextual":[117],"awareness.":[118],"Furthermore,":[119],"introduce":[121],"reward":[124],"mechanism":[125],"based":[126],"supervision":[128],"discrepancy":[129],"guide":[131],"effective":[132],"collection.":[134],"Experimental":[135],"results":[136],"validate":[137],"effectiveness":[139],"our":[141],"proposed":[142],"reinforcement":[143],"strategy.":[145],"work":[147],"highlights":[148],"potential":[150],"designing":[152],"without":[159],"intervene":[161],"human":[163],"beings.":[164],"Nevertheless,":[165],"acknowledge":[167],"remains":[171],"nascent":[173],"underexplored":[175],"area,":[176],"with":[177],"significant":[178],"room":[179],"for":[180],"further":[181],"future":[182],"research":[183],"discovery":[186],"more":[188],"optimal":[189],"approaches.":[190]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
