{"id":"https://openalex.org/W4401416469","doi":"https://doi.org/10.1109/icra57147.2024.10610992","title":"Multi-Level Progressive Reinforcement Learning for Control Policy in Physical Simulations","display_name":"Multi-Level Progressive Reinforcement Learning for Control Policy in Physical Simulations","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401416469","doi":"https://doi.org/10.1109/icra57147.2024.10610992"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062634448","display_name":"Kefei Wu","orcid":"https://orcid.org/0000-0001-5733-7753"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kefei Wu","raw_affiliation_strings":["ShanghaiTech University"],"affiliations":[{"raw_affiliation_string":"ShanghaiTech University","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015970030","display_name":"Xuming He","orcid":"https://orcid.org/0000-0003-2150-1237"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuming He","raw_affiliation_strings":["ShanghaiTech University"],"affiliations":[{"raw_affiliation_string":"ShanghaiTech University","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100784862","display_name":"Yang Wang","orcid":"https://orcid.org/0000-0003-2596-4933"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["ShanghaiTech University"],"affiliations":[{"raw_affiliation_string":"ShanghaiTech University","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024040216","display_name":"Xiaopei Liu","orcid":"https://orcid.org/0000-0003-4518-618X"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaopei Liu","raw_affiliation_strings":["ShanghaiTech University"],"affiliations":[{"raw_affiliation_string":"ShanghaiTech University","institution_ids":["https://openalex.org/I30809798"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062634448"],"corresponding_institution_ids":["https://openalex.org/I30809798"],"apc_list":null,"apc_paid":null,"fwci":0.3637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.65455143,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"9502","last_page":"9508"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.9585999846458435,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8204402923583984},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6757947206497192},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5482443571090698},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36765575408935547}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8204402923583984},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757947206497192},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5482443571090698},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36765575408935547}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W64088143","https://openalex.org/W1498436455","https://openalex.org/W1823791555","https://openalex.org/W2142092882","https://openalex.org/W2615547864","https://openalex.org/W2786928559","https://openalex.org/W2788239209","https://openalex.org/W2904246096","https://openalex.org/W2962872206","https://openalex.org/W2975445440","https://openalex.org/W2988458564","https://openalex.org/W2996831576","https://openalex.org/W3046419021","https://openalex.org/W3048472464","https://openalex.org/W3093922502","https://openalex.org/W3104876774","https://openalex.org/W3124349599","https://openalex.org/W3170969988","https://openalex.org/W3213974477","https://openalex.org/W4285102606","https://openalex.org/W4285159093","https://openalex.org/W4291143874","https://openalex.org/W4293542549","https://openalex.org/W4319988532","https://openalex.org/W6638668618","https://openalex.org/W6692846177","https://openalex.org/W6694756022","https://openalex.org/W6747473740","https://openalex.org/W6748554570","https://openalex.org/W6748638692","https://openalex.org/W6748866174","https://openalex.org/W6755437240","https://openalex.org/W6757592117","https://openalex.org/W6780559895","https://openalex.org/W6781494318","https://openalex.org/W6796791985","https://openalex.org/W6800004206","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109","https://openalex.org/W4362501864","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Training":[0],"model-free":[1],"intelligent":[2],"agents":[3],"in":[4,29,108,121,211,214],"complex":[5],"real-world":[6],"scenarios":[7],"using":[8],"reinforcement":[9],"learning":[10,147,189,215],"(RL)":[11],"often":[12],"necessitates":[13],"simulation-based":[14],"environments":[15],"due":[16],"to":[17,37,46,64,71,124,218],"high":[18],"physical":[19,204],"expenses.":[20],"However,":[21],"when":[22],"simulation":[23,34,179],"takes":[24],"a":[25,50,58,86,104,109,155,161,177,192,209,219],"long":[26],"time,":[27],"e.g.,":[28],"an":[30,197],"unsteady":[31],"3D":[32],"fluid":[33],"with":[35,130,160],"interactions":[36],"the":[38,68,77,82,146,169],"controllable":[39],"solids,":[40],"existing":[41],"RL":[42,63,117,141,175,221],"algorithms":[43],"meet":[44],"difficulty":[45],"accomplish":[47],"training":[48],"within":[49],"reasonable":[51],"timeframes.":[52],"In":[53],"this":[54,73],"paper,":[55],"we":[56,153],"propose":[57],"novel":[59,156],"multi-level":[60,157],"framework":[61,183],"for":[62,176],"accelerate":[65],"convergence":[66],"as":[67],"first":[69],"attempt":[70],"address":[72],"difficulty.":[74],"Motivated":[75],"by":[76,173,186],"idea":[78],"of":[79,106,168],"multi-grid":[80],"solver,":[81],"control":[83,127,226],"policy":[84,188],"on":[85,191],"virtual":[87,193],"agent":[88],"over":[89],"time":[90,216],"can":[91,99],"be":[92,100],"decomposed":[93],"into":[94],"different":[95],"frequency":[96,128,136],"levels,":[97],"which":[98],"progressively":[101],"learned":[102,172],"via":[103],"set":[105],"simulations":[107,123],"coarse-to-fine":[110],"manner.":[111],"It":[112],"is":[113,171,184],"expected":[114],"that":[115],"most":[116],"trials":[118],"are":[119],"performed":[120],"coarser":[122],"learn":[125],"lower":[126],"levels":[129,137],"more":[131],"efficient":[132],"convergence,":[133],"while":[134,223],"higher":[135],"require":[138],"much":[139],"less":[140],"trials,":[142],"thus":[143],"significantly":[144],"accelerating":[145],"process.":[148],"To":[149],"implement":[150],"our":[151],"idea,":[152],"designed":[154],"residual":[158],"network":[159,170],"filter":[162],"module":[163],"attached,":[164],"where":[165],"each":[166],"level":[167],"performing":[174],"given":[178],"resolution.":[180],"The":[181],"proposed":[182],"evaluated":[185],"conducting":[187],"experiments":[190],"aerial":[194],"(2D)":[195],"and":[196],"underwater":[198],"(3D)":[199],"robot,":[200],"both":[201],"requiring":[202],"time-consuming":[203],"simulations.":[205],"Our":[206],"results":[207],"demonstrate":[208],"decrease":[210],"almost":[212],"half":[213],"compared":[217],"direct":[220],"approach,":[222],"achieving":[224],"similar":[225],"performance.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
