{"id":"https://openalex.org/W4402351473","doi":"https://doi.org/10.1109/ijcnn60899.2024.10651451","title":"High-quality Synthetic Data is Efficient for Model-based Offline Reinforcement Learning","display_name":"High-quality Synthetic Data is Efficient for Model-based Offline Reinforcement Learning","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402351473","doi":"https://doi.org/10.1109/ijcnn60899.2024.10651451"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10651451","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10651451","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101736381","display_name":"Qichao Zhang","orcid":"https://orcid.org/0000-0002-8278-7083"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qichao Zhang","raw_affiliation_strings":["Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102871890","display_name":"Xing Fang","orcid":"https://orcid.org/0000-0002-1801-6831"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Fang","raw_affiliation_strings":["Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084215678","display_name":"Kaixuan Xu","orcid":"https://orcid.org/0009-0000-0360-6153"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaixuan Xu","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Artificial Intelligence,Beijing,China,100049"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Artificial Intelligence,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088989949","display_name":"Weixin Zhao","orcid":"https://orcid.org/0000-0002-4661-0707"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weixin Zhao","raw_affiliation_strings":["ZHEJIANG SUPCON TECHNOLOGY CO., LTD,Hangzhou,China,310059"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ZHEJIANG SUPCON TECHNOLOGY CO., LTD,Hangzhou,China,310059","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327745","display_name":"Haoran Li","orcid":"https://orcid.org/0000-0003-2559-9585"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoran Li","raw_affiliation_strings":["Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation,Beijing,China,100190","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3055,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.6437864,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9635000228881836,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8025062680244446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7633423209190369},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5424931049346924},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.525395929813385},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5033230185508728},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.41421768069267273},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10719680786132812}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8025062680244446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7633423209190369},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5424931049346924},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.525395929813385},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5033230185508728},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.41421768069267273},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10719680786132812},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10651451","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10651451","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1959608418","https://openalex.org/W2167224731","https://openalex.org/W2187089797","https://openalex.org/W2939569248","https://openalex.org/W2947150733","https://openalex.org/W2966477753","https://openalex.org/W2991355586","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3025606523","https://openalex.org/W3028766998","https://openalex.org/W3033324992","https://openalex.org/W3169291081","https://openalex.org/W3172360140","https://openalex.org/W3201700917","https://openalex.org/W3202125656","https://openalex.org/W3203827806","https://openalex.org/W3206246732","https://openalex.org/W3212041118","https://openalex.org/W4221146510","https://openalex.org/W4221161853","https://openalex.org/W4226440390","https://openalex.org/W4282813201","https://openalex.org/W4283076713","https://openalex.org/W4285604474","https://openalex.org/W4306818327","https://openalex.org/W4307774253","https://openalex.org/W4308080451","https://openalex.org/W4360584316","https://openalex.org/W4387171559","https://openalex.org/W6640963894","https://openalex.org/W6684338915","https://openalex.org/W6684921986","https://openalex.org/W6748839928","https://openalex.org/W6757469721","https://openalex.org/W6763704811","https://openalex.org/W6771270455","https://openalex.org/W6774583691","https://openalex.org/W6776438516","https://openalex.org/W6776601253","https://openalex.org/W6777091672","https://openalex.org/W6777656069","https://openalex.org/W6779265984","https://openalex.org/W6791413555","https://openalex.org/W6796289742","https://openalex.org/W6796589144","https://openalex.org/W6799150178","https://openalex.org/W6801728237","https://openalex.org/W6801801719","https://openalex.org/W6802659552","https://openalex.org/W6802704075","https://openalex.org/W6803888866","https://openalex.org/W6810669889","https://openalex.org/W6810865680","https://openalex.org/W6838277686","https://openalex.org/W6838356327","https://openalex.org/W6838483015","https://openalex.org/W6838958960","https://openalex.org/W6840064209","https://openalex.org/W6841549819","https://openalex.org/W6843816287","https://openalex.org/W6846142413","https://openalex.org/W6856947211"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Recent":[0],"work":[1],"has":[2],"found":[3],"that":[4,128,168],"two":[5],"types":[6],"of":[7,53,63,150,178],"dataset":[8],"characteristics":[9],"including":[10],"the":[11,26,39,51,138,148,164,176],"dataset\u2019s":[12,40],"coverage":[13,41],"and":[14,46,85,100,116,197],"data":[15,36,68,94,127,146,171],"quality":[16,56],"are":[17],"critical":[18],"for":[19,71,121],"offline":[20,29,72,133,158,199],"reinforcement":[21],"learning":[22],"(RL).":[23],"To":[24],"improve":[25],"policy,":[27],"model-based":[28,198],"RL":[30,73,159,200],"tries":[31],"to":[32,37,97,111,124],"generate":[33,98,112,143],"reliable":[34,101,122],"synthetic":[35,54,67,102,126,145,170],"expand":[38],"based":[42],"on":[43,147,163],"trained":[44],"forward":[45,82,109],"backward":[47,86,119],"dynamics":[48],"models.":[49],"However,":[50],"characteristic":[52],"data\u2019s":[55],"is":[57,69,91],"ignoring,":[58],"which":[59,90,152],"raises":[60],"a":[61,80,107,118,179,187],"question":[62],"whether":[64],"augmenting":[65],"high-quality":[66,99,113,144,169],"efficient":[70],"agents.":[74],"Motivated":[75],"by":[76,173],"this,":[77],"we":[78,105],"propose":[79],"novel":[81],"High-quality":[83],"Imagination":[84],"Reliable":[87],"Check":[88],"(HIRC),":[89],"an":[92],"effective":[93],"augmentation":[95],"method":[96,141],"data.":[103],"Specifically,":[104],"construct":[106],"value-guided":[108],"model":[110,120],"imaginary":[114],"trajectories,":[115],"employ":[117],"checking":[123],"obtain":[125],"better":[129,191],"match":[130],"with":[131,156,185],"pre-collected":[132],"transitions.":[134],"In":[135],"other":[136],"words,":[137],"proposed":[139],"HIRC":[140,174,184],"can":[142,153],"premise":[149],"reliability,":[151],"be":[154],"combined":[155],"model-free":[157,196],"methods.":[160,201],"Experimental":[161],"results":[162],"D4RL":[165],"benchmark":[166],"demonstrate":[167],"generated":[172],"boosts":[175],"performance":[177],"base":[180,188],"agent":[181,189],"TD3_BC.":[182],"Especially,":[183],"such":[186],"achieves":[190],"scores":[192],"against":[193],"recent":[194],"popular":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
