{"id":"https://openalex.org/W4413917070","doi":"https://doi.org/10.1109/icra55743.2025.11127726","title":"Automated Hybrid Reward Scheduling Via Large Language Models for Robotic Skill Learning","display_name":"Automated Hybrid Reward Scheduling Via Large Language Models for Robotic Skill Learning","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413917070","doi":"https://doi.org/10.1109/icra55743.2025.11127726"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11127726","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11127726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039370698","display_name":"Changxin Huang","orcid":"https://orcid.org/0000-0003-2532-1663"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changxin Huang","raw_affiliation_strings":["Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115595981","display_name":"Junyang Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyang Liang","raw_affiliation_strings":["Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036344269","display_name":"Yanbin Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanbin Chang","raw_affiliation_strings":["Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018133279","display_name":"Jingzhao Xu","orcid":"https://orcid.org/0000-0001-9926-3414"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingzhao Xu","raw_affiliation_strings":["Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061","institution_ids":["https://openalex.org/I4210152380"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100393867","display_name":"Jianqiang Li","orcid":"https://orcid.org/0000-0001-9800-5165"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiang Li","raw_affiliation_strings":["Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen University,National Engineering Laboratory for Big Data System Computing Technology,Shenzhen,China,518061","institution_ids":["https://openalex.org/I4210152380"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7588,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88344702,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"8242","last_page":"8248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6581000089645386,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6581000089645386,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.5958999991416931,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7819919586181641},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5628829002380371},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48458635807037354},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33035579323768616},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09751978516578674}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7819919586181641},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5628829002380371},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48458635807037354},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33035579323768616},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09751978516578674},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11127726","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11127726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1149790710","display_name":null,"funder_award_id":"6240020443,62073225,62203134","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2754517384","https://openalex.org/W3207033168","https://openalex.org/W3207612633","https://openalex.org/W4309669389","https://openalex.org/W4383097638","https://openalex.org/W4383108457","https://openalex.org/W4383108491","https://openalex.org/W4385430679","https://openalex.org/W4388660746","https://openalex.org/W4389352485","https://openalex.org/W4394674699","https://openalex.org/W4399374221","https://openalex.org/W4401414856","https://openalex.org/W4401415240","https://openalex.org/W4401415287","https://openalex.org/W4401416363","https://openalex.org/W4401553778"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Enabling":[0],"a":[1,9,25,124,132,141,152,173,188],"high-degree-of-freedom":[2,226],"robot":[3],"to":[4,13,39,55,120,140],"learn":[5],"specific":[6],"skills":[7,122],"is":[8,76,150],"challenging":[10],"task":[11,181],"due":[12],"the":[14,33,57,80,106,114,180,193,201,210],"complexity":[15],"of":[16,35,69,109,203],"robotic":[17,45,227],"dynamics.":[18],"Reinforcement":[19],"learning":[20,82,107],"(RL)":[21],"has":[22],"emerged":[23],"as":[24],"promising":[26],"solution;":[27],"however,":[28],"addressing":[29],"such":[30],"problems":[31],"requires":[32],"design":[34,131],"multiple":[36,225],"reward":[37,52,71,111,143],"functions":[38],"account":[40],"for":[41],"various":[42],"constraints":[43],"in":[44,73,123,176],"motion.":[46],"Existing":[47],"approaches":[48],"typically":[49],"sum":[50],"all":[51,70],"components":[53,72],"indiscriminately":[54],"optimize":[56],"RL":[58],"value":[59,134],"function":[60],"and":[61,78,126,158,183],"policy.":[62],"We":[63],"argue":[64],"that":[65,154,199,209],"this":[66],"uniform":[67],"inclusion":[68],"policy":[74,115,146],"optimization":[75,116],"inefficient":[77],"limits":[79],"robot's":[81],"performance.":[83],"To":[84],"address":[85],"this,":[86],"we":[87,130],"propose":[88],"an":[89,214],"Automated":[90],"Hybrid":[91],"Reward":[92],"Scheduling":[93],"(AHRS)":[94],"framework":[95],"based":[96,164,195],"on":[97,165,196],"Large":[98],"Language":[99],"Models":[100],"(LLMs).":[101],"This":[102],"paradigm":[103],"dynamically":[104],"adjusts":[105],"intensity":[108],"each":[110,137,148,204],"component":[112],"throughout":[113],"process,":[117],"enabling":[118],"robots":[119],"acquire":[121],"gradual":[125],"structured":[127],"manner.":[128],"Specifically,":[129],"multi-branch":[133],"network,":[135],"where":[136],"branch":[138,149],"corresponds":[139],"distinct":[142],"component.":[144],"During":[145],"optimization,":[147],"assigned":[151],"weight":[153,189],"reflects":[155],"its":[156],"importance,":[157],"these":[159],"weights":[160],"are":[161],"automatically":[162],"computed":[163],"rules":[166],"designed":[167],"by":[168],"LLMs.":[169],"The":[170],"LLM":[171],"generates":[172],"rule":[174,191],"set":[175],"advance,":[177],"derived":[178],"from":[179,192],"description,":[182],"during":[184],"training,":[185],"it":[186],"selects":[187],"calculation":[190],"library":[194],"language":[197],"prompts":[198],"evaluate":[200],"performance":[202,222],"branch.":[205],"Experimental":[206],"results":[207],"demonstrate":[208],"AHRS":[211],"method":[212],"achieves":[213],"average":[215],"<tex":[216],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[217],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{6.":[218],"4":[219],"8":[220],"\\%}$</tex>":[221],"improvement":[223],"across":[224],"tasks.":[228]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
