{"id":"https://openalex.org/W4412623290","doi":"https://doi.org/10.1109/tai.2025.3592174","title":"Balanced Sampling and Reusing Imaginary Data for World Models in Reinforcement Learning","display_name":"Balanced Sampling and Reusing Imaginary Data for World Models in Reinforcement Learning","publication_year":2025,"publication_date":"2025-07-24","ids":{"openalex":"https://openalex.org/W4412623290","doi":"https://doi.org/10.1109/tai.2025.3592174"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2025.3592174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3592174","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qianyu Wang","orcid":"https://orcid.org/0009-0000-3231-9743"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qianyu Wang","raw_affiliation_strings":["School of Computer Science, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084203901","display_name":"Xuekai Wei","orcid":"https://orcid.org/0000-0002-3761-1759"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuekai Wei","raw_affiliation_strings":["School of Computer Science, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091335601","display_name":"Jielu Yan","orcid":"https://orcid.org/0000-0001-8342-7453"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jielu Yan","raw_affiliation_strings":["School of Computer Science, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035706700","display_name":"Leong Hou U","orcid":"https://orcid.org/0000-0002-5135-5165"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]},{"id":"https://openalex.org/I6469544","display_name":"City University of Macau","ror":"https://ror.org/04gpd4q15","country_code":"MO","type":"education","lineage":["https://openalex.org/I6469544"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Leong Hou U","raw_affiliation_strings":["Department of Computer and Information Science, State Key Laboratory of Internet of Things for Smart City, Centre for Data Science, University of Macau, Macau, China","State Key Laboratory of Internet of Things for Smart City, Centre for Data Science, Department of Computer and Information Science, University of Macau, Macau, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer and Information Science, State Key Laboratory of Internet of Things for Smart City, Centre for Data Science, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]},{"raw_affiliation_string":"State Key Laboratory of Internet of Things for Smart City, Centre for Data Science, Department of Computer and Information Science, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033390427","display_name":"Huayan Pu","orcid":"https://orcid.org/0000-0001-9830-3955"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huayan Pu","raw_affiliation_strings":["State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jun Luo","orcid":"https://orcid.org/0000-0003-1314-5631"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Luo","raw_affiliation_strings":["State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101752580","display_name":"Weijia Jia","orcid":"https://orcid.org/0000-0003-1000-3937"},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]},{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weijia Jia","raw_affiliation_strings":["BNU-UIC Institute of Artificial Intelligence and Future Networks, Beijing Normal University and Guangdong Key Laboratory of AI Multi-Modal Data Processing, BNU-HKBU United International College, Zhuhai, Guangdong, China","BNU-UIC Institute of Artificial Intelligence and Future Networks Beijing Normal University, Zhuhai, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"BNU-UIC Institute of Artificial Intelligence and Future Networks, Beijing Normal University and Guangdong Key Laboratory of AI Multi-Modal Data Processing, BNU-HKBU United International College, Zhuhai, Guangdong, China","institution_ids":["https://openalex.org/I12615008"]},{"raw_affiliation_string":"BNU-UIC Institute of Artificial Intelligence and Future Networks Beijing Normal University, Zhuhai, Guangdong, China","institution_ids":["https://openalex.org/I25254941","https://openalex.org/I12615008"]}]},{"author_position":"last","author":{"id":null,"display_name":"Mingliang Zhou","orcid":"https://orcid.org/0000-0002-1874-3641"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingliang Zhou","raw_affiliation_strings":["School of Computer Science, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I158842170"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08766515,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"2","first_page":"1118","last_page":"1130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6194000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6194000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/the-imaginary","display_name":"The Imaginary","score":0.8241287469863892},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.713512659072876},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6392769813537598},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6044107675552368},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5309469103813171},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4715464413166046},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3677595257759094},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2715613842010498},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10550722479820251},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08016863465309143},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07873713970184326},{"id":"https://openalex.org/keywords/psychoanalysis","display_name":"Psychoanalysis","score":0.07038483023643494}],"concepts":[{"id":"https://openalex.org/C135068731","wikidata":"https://www.wikidata.org/wiki/Q1169049","display_name":"The Imaginary","level":2,"score":0.8241287469863892},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.713512659072876},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6392769813537598},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6044107675552368},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5309469103813171},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4715464413166046},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3677595257759094},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2715613842010498},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10550722479820251},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08016863465309143},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07873713970184326},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.07038483023643494},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2025.3592174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3592174","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2889962487","display_name":null,"funder_award_id":"62176027","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1993411524","https://openalex.org/W2064675550","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2761873684","https://openalex.org/W2798494119","https://openalex.org/W2950635152","https://openalex.org/W2989847975","https://openalex.org/W3035965352","https://openalex.org/W3100366369","https://openalex.org/W3175558129","https://openalex.org/W3184846191","https://openalex.org/W4252279978","https://openalex.org/W4312277666","https://openalex.org/W4366493064","https://openalex.org/W4385245566","https://openalex.org/W4385430550","https://openalex.org/W4393032096"],"related_works":["https://openalex.org/W3016733773","https://openalex.org/W2491088243","https://openalex.org/W1979400567","https://openalex.org/W2353064048","https://openalex.org/W2000265659","https://openalex.org/W2754367428","https://openalex.org/W4389942819","https://openalex.org/W2982365196","https://openalex.org/W4309512917","https://openalex.org/W2390332902"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,96],"learning":[2,97],"(DRL)":[3],"has":[4],"shown":[5],"significant":[6],"success":[7],"in":[8,30,35,70,181],"domains":[9],"such":[10,64],"as":[11,65],"computer":[12],"vision":[13],"and":[14,54,74,84,101,128,144,166,175,187],"robot":[15],"control.":[16],"However,":[17],"DRL":[18,179],"agents":[19],"often":[20],"suffer":[21],"from":[22],"low":[23],"sample":[24,164,185],"efficiency,":[25],"limiting":[26],"their":[27,58],"practical":[28],"applicability":[29],"industrial":[31],"settings.":[32],"Recent":[33],"advances":[34],"model-based":[36,39],"DRL,":[37],"particularly":[38],"approaches,":[40],"have":[41],"sought":[42],"to":[43,51,82,122],"address":[44],"this":[45],"issue":[46],"by":[47,140],"leveraging":[48],"imaginary":[49,77,103,132,149],"data":[50,104,133],"improve":[52],"decision-making":[53],"sampling":[55,100,114,120],"efficiency.":[56],"Despite":[57],"promise,":[59],"these":[60,90],"methods":[61],"face":[62],"challenges":[63],"overreliance":[66],"on":[67,154],"early":[68,126],"experiences":[69,127],"the":[71,146,155],"replay":[72],"buffer":[73],"under-utilization":[75],"of":[76,148],"data,":[78],"which":[79,106],"can":[80],"lead":[81],"overfitting":[83],"suboptimal":[85],"policy":[86,138],"optimization.":[87],"To":[88],"overcome":[89],"limitations,":[91],"we":[92],"propose":[93],"a":[94,112,130,173],"novel":[95],"framework,":[98],"balanced":[99,113],"reusing":[102,131],"(BSRID),":[105],"introduces":[107],"two":[108],"key":[109],"innovations:":[110],"(1)":[111],"(BS)":[115],"mechanism":[116],"that":[117,136,160],"ensures":[118],"uniform":[119],"rates":[121],"mitigate":[123],"bias":[124],"toward":[125],"(2)":[129],"(RID)":[134],"strategy":[135],"enhances":[137],"optimization":[139],"increasing":[141],"update":[142],"frequency":[143],"maximizing":[145],"utility":[147],"data.":[150],"The":[151,191],"experimental":[152],"results":[153],"Atari":[156],"100k":[157],"benchmark":[158],"demonstrate":[159],"BSRID":[161],"significantly":[162],"improves":[163],"efficiency":[165,186],"achieves":[167],"state-of-the-art":[168],"performance.":[169],"This":[170],"work":[171],"provides":[172],"robust":[174],"efficient":[176],"solution":[177],"for":[178],"applications":[180],"scenarios":[182],"requiring":[183],"high":[184],"reliable":[188],"decision":[189],"making.":[190],"code":[192],"is":[193],"available":[194],"at":[195],"<uri":[196],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[197],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/wwwqqyy/BSRID</uri>.":[198]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
