{"id":"https://openalex.org/W4411725490","doi":"https://doi.org/10.1109/tsmc.2025.3578050","title":"Improving Sample Efficiency Through Stability Enhancement in Deep-Reinforcement Learning","display_name":"Improving Sample Efficiency Through Stability Enhancement in Deep-Reinforcement Learning","publication_year":2025,"publication_date":"2025-06-27","ids":{"openalex":"https://openalex.org/W4411725490","doi":"https://doi.org/10.1109/tsmc.2025.3578050"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2025.3578050","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3578050","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101862911","display_name":"Ziru Wang","orcid":"https://orcid.org/0000-0002-6068-6978"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziru Wang","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-6068-6978","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110798844","display_name":"Wanli Jiang","orcid":"https://orcid.org/0009-0005-7121-828X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanli Jiang","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0009-0005-7121-828X","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101517784","display_name":"Ru Peng","orcid":"https://orcid.org/0000-0002-2122-5263"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ru Peng","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087717420","display_name":"Qian Kou","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Kou","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0009-0003-5797-8915","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050982461","display_name":"Lipeng Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lipeng Wan","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0009-0009-5679-056X","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006277484","display_name":"Xuguang Lan","orcid":"https://orcid.org/0000-0002-3422-944X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuguang Lan","raw_affiliation_strings":["College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-3422-944X","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101862911"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":4.3465,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94176773,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"55","issue":"9","first_page":"6164","last_page":"6176"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7771999835968018,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7771999835968018,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.6934999823570251,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.6628000140190125,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.622397243976593},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5904939770698547},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5728282332420349},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5356995463371277},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35611748695373535},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2938600480556488},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.2796248197555542},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.169968843460083},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.1337474286556244},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.11799019575119019},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.07631480693817139}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.622397243976593},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5904939770698547},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5728282332420349},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5356995463371277},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35611748695373535},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2938600480556488},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.2796248197555542},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.169968843460083},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.1337474286556244},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.11799019575119019},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.07631480693817139}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2025.3578050","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3578050","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1773090561","display_name":null,"funder_award_id":"62203348","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3833898155","display_name":null,"funder_award_id":"62088102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6590685336","display_name":null,"funder_award_id":"52435010","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7207008540","display_name":null,"funder_award_id":"U23A20339","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7435956236","display_name":null,"funder_award_id":"2025SYS-SYSZD-083","funder_id":"https://openalex.org/F4320324173","funder_display_name":"Natural Science Foundation of Shaanxi Province"},{"id":"https://openalex.org/G8180423952","display_name":null,"funder_award_id":"62125305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324173","display_name":"Natural Science Foundation of Shaanxi Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1534477342","https://openalex.org/W2145339207","https://openalex.org/W2766447205","https://openalex.org/W2905334533","https://openalex.org/W3047304572","https://openalex.org/W3080884797","https://openalex.org/W3123353479","https://openalex.org/W3175558129","https://openalex.org/W3188220908","https://openalex.org/W4252279978","https://openalex.org/W4310862819","https://openalex.org/W4312314355","https://openalex.org/W4313590748","https://openalex.org/W4319792528","https://openalex.org/W4390939337","https://openalex.org/W4401018111","https://openalex.org/W6601076486","https://openalex.org/W6637162671","https://openalex.org/W6638214083","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6726983090","https://openalex.org/W6729595453","https://openalex.org/W6738248287","https://openalex.org/W6741002519","https://openalex.org/W6745375793","https://openalex.org/W6747387971","https://openalex.org/W6747473740","https://openalex.org/W6760755450","https://openalex.org/W6763177082","https://openalex.org/W6771217966","https://openalex.org/W6771750343","https://openalex.org/W6773785716","https://openalex.org/W6774948183","https://openalex.org/W6778883912","https://openalex.org/W6778993914","https://openalex.org/W6779402086","https://openalex.org/W6780135261","https://openalex.org/W6780386840","https://openalex.org/W6780571964","https://openalex.org/W6783988234","https://openalex.org/W6784434538","https://openalex.org/W6784884523","https://openalex.org/W6787713516","https://openalex.org/W6787984396","https://openalex.org/W6790330943","https://openalex.org/W6790695711","https://openalex.org/W6790934530","https://openalex.org/W6790969063","https://openalex.org/W6795534559","https://openalex.org/W6796062334","https://openalex.org/W6799458897","https://openalex.org/W6801683292","https://openalex.org/W6801801719","https://openalex.org/W6803771995","https://openalex.org/W6810081322","https://openalex.org/W6810667582","https://openalex.org/W6838266304","https://openalex.org/W6839008450","https://openalex.org/W6841908490","https://openalex.org/W6842984692","https://openalex.org/W6846376466","https://openalex.org/W6853324332","https://openalex.org/W6868649659","https://openalex.org/W7034108470","https://openalex.org/W7061746012"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Prioritizing":[0],"or":[1],"reweighting":[2],"important":[3],"samples":[4],"has":[5],"been":[6],"recognized":[7],"as":[8],"an":[9],"effective":[10],"means":[11],"of":[12,16,93],"improving":[13,89],"the":[14,48,61,81,106,113],"efficiency":[15,29,45,53,131],"deep-reinforcement":[17],"learning":[18,87,152],"(DRL)":[19],"algorithms.":[20],"However,":[21],"many":[22],"existing":[23],"techniques":[24,153],"encounter":[25],"stability":[26,75,138],"challenges,":[27],"limiting":[28],"and":[30,34,54,76,88,111,122,132],"increasing":[31],"computational":[32],"costs":[33],"training":[35,44],"time.":[36],"In":[37],"this":[38],"study,":[39],"we":[40,59],"aim":[41],"to":[42,79],"improve":[43],"by":[46,135],"exploring":[47],"intrinsic":[49],"relationship":[50],"between":[51],"sample":[52,68,130],"stability.":[55],"To":[56],"achieve":[57],"this,":[58],"propose":[60],"Stability":[62],"Contribution":[63],"Index":[64],"(SI),":[65],"which":[66],"assigns":[67],"priorities":[69],"based":[70],"on":[71,74,101],"their":[72],"impact":[73],"employs":[77],"them":[78],"weight":[80],"value":[82],"loss,":[83],"thereby":[84],"promoting":[85],"stable":[86],"efficiency.":[90],"The":[91],"effectiveness":[92],"our":[94,126,146],"method":[95,147],"is":[96],"validated":[97],"through":[98],"comprehensive":[99],"experiments":[100],"two":[102],"distinct":[103],"benchmarks:":[104],"1)":[105],"continuous":[107],"control":[108,115],"domain":[109],"DMControl":[110],"2)":[112],"discrete":[114],"environment":[116],"ProcGen.":[117],"Compatible":[118],"with":[119],"both":[120],"off-policy":[121],"on-policy":[123],"DRL":[124],"algorithms,":[125],"approach":[127],"significantly":[128],"improves":[129],"overall":[133],"performance":[134],"fostering":[136],"greater":[137],"during":[139],"training.":[140],"Additionally,":[141],"experimental":[142],"results":[143],"show":[144],"that":[145],"outperforms":[148],"well-established":[149],"sample-efficient":[150],"reinforcement":[151],"across":[154],"multiple":[155],"settings.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
