{"id":"https://openalex.org/W4392903507","doi":"https://doi.org/10.1109/icassp48485.2024.10446392","title":"CDA-MBPO:Corrected Data Aggregation for Model-Based Policy Optimization","display_name":"CDA-MBPO:Corrected Data Aggregation for Model-Based Policy Optimization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903507","doi":"https://doi.org/10.1109/icassp48485.2024.10446392"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446392","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054600540","display_name":"Du Xin","orcid":"https://orcid.org/0000-0002-5348-4438"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Du","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Electronics and Information Engineering, Suzhou University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101902909","display_name":"Shan Zhong","orcid":"https://orcid.org/0000-0003-0034-6952"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Suzhou University of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shan Zhong","raw_affiliation_strings":["Changshu Institute of Technology,School of Computer Science and Engineering","School of Computer Science and Engineering, Changshu Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Changshu Institute of Technology,School of Computer Science and Engineering","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088395357","display_name":"Wenhao Ying","orcid":"https://orcid.org/0000-0001-5992-5444"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Suzhou University of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Ying","raw_affiliation_strings":["Changshu Institute of Technology,School of Computer Science and Engineering","School of Computer Science and Engineering, Changshu Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Changshu Institute of Technology,School of Computer Science and Engineering","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114461972","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0002-1728-9563"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Wang","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Electronics and Information Engineering, Suzhou University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038799336","display_name":"Shengrong Gong","orcid":"https://orcid.org/0000-0003-0266-2422"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Suzhou University of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]},{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengrong Gong","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Electronics and Information Engineering, Suzhou University of Science and Technology","School of Computer Science and Engineering, Changshu Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0252505,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6540","last_page":"6544"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.8877745270729065},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7204210758209229},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.633755624294281},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5667054653167725},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.563147783279419},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5428436398506165},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5234538912773132},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.45017409324645996},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4464249610900879},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34031689167022705},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.336531400680542},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2408968210220337},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19168677926063538},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.12432464957237244},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.09954991936683655},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.08015972375869751}],"concepts":[{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.8877745270729065},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204210758209229},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.633755624294281},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5667054653167725},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.563147783279419},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5428436398506165},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5234538912773132},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.45017409324645996},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4464249610900879},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34031689167022705},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.336531400680542},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2408968210220337},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19168677926063538},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.12432464957237244},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.09954991936683655},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.08015972375869751},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446392","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2447216977","display_name":null,"funder_award_id":"62376041","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2593542410","display_name":null,"funder_award_id":"2021M69236","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G3398038842","display_name":null,"funder_award_id":"61972059","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1491843047","https://openalex.org/W2268617045","https://openalex.org/W3212671125","https://openalex.org/W4225110331","https://openalex.org/W4226345170","https://openalex.org/W4288319859","https://openalex.org/W4298206671","https://openalex.org/W4306819990","https://openalex.org/W4309569224","https://openalex.org/W4317837800","https://openalex.org/W4366507128","https://openalex.org/W4379739787","https://openalex.org/W4381384259","https://openalex.org/W6747473740","https://openalex.org/W6751494529","https://openalex.org/W6764053384","https://openalex.org/W6779142360","https://openalex.org/W6803529815","https://openalex.org/W6810277566","https://openalex.org/W6810450869","https://openalex.org/W6846235960","https://openalex.org/W6846804961","https://openalex.org/W6848761080"],"related_works":["https://openalex.org/W4293202849","https://openalex.org/W1980965563","https://openalex.org/W1489300767","https://openalex.org/W2387995142","https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W2742914308","https://openalex.org/W2330004501","https://openalex.org/W2017089693","https://openalex.org/W2703295919"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1],"learning":[2],"has":[3],"shown":[4],"promise":[5],"in":[6,118],"sample":[7,119],"efficiency":[8,120],"but":[9],"suffers":[10],"from":[11,41],"errors":[12],"accumulated":[13],"during":[14],"multi-step":[15],"model":[16,90],"sampling.":[17],"To":[18],"tackle":[19],"this":[20],"issue,":[21],"we":[22],"propose":[23],"corrected":[24],"data":[25],"aggregation":[26],"for":[27],"model-based":[28,116],"policy":[29],"optimization.":[30],"This":[31],"approach":[32],"involves":[33],"aligning":[34],"simulated":[35,66,97],"trajectories":[36],"with":[37,46],"their":[38],"real":[39,93],"counterparts":[40],"random":[42],"starting":[43],"states":[44],"and":[45,87,95,121],"varying":[47],"sampling":[48],"lengths":[49],"to":[50,60],"create":[51],"paired":[52,80],"real-simulated":[53],"samples.":[54],"The":[55,107],"R-Q":[56,71,100],"discriminator":[57],"is":[58,129],"incorporated":[59],"assess":[61],"the":[62,65,70,84,88,96],"quality":[63],"of":[64],"samples":[67,94,98],"by":[68],"computing":[69],"difference,":[72],"modeled":[73],"as":[74],"a":[75,104],"Gaussian":[76],"distribution":[77],"within":[78],"each":[79],"sample.":[81],"We":[82],"update":[83],"Q":[85],"network":[86],"dynamics":[89],"using":[91],"all":[92],"whose":[99],"difference":[101],"fall":[102],"below":[103],"predefined":[105],"threshold.":[106],"experimental":[108],"results":[109],"demonstrate":[110],"that":[111],"our":[112],"method":[113],"outperforms":[114],"state-of-the-art":[115],"methods":[117],"asymptotic":[122],"performance":[123],"across":[124],"challenging":[125],"tasks.":[126],"Our":[127],"code":[128],"available":[130],"at":[131],"https://github.com/duxin0618/CDA-MBPO.":[132]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
