{"id":"https://openalex.org/W4392903507","doi":"https://doi.org/10.1109/icassp48485.2024.10446392","title":"CDA-MBPO:Corrected Data Aggregation for Model-Based Policy Optimization","display_name":"CDA-MBPO:Corrected Data Aggregation for Model-Based Policy Optimization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903507","doi":"https://doi.org/10.1109/icassp48485.2024.10446392"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446392","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054600540","display_name":"Du Xin","orcid":"https://orcid.org/0000-0002-5348-4438"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xin Du","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Electronics and Information Engineering, Suzhou University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101902909","display_name":"Shan Zhong","orcid":"https://orcid.org/0000-0003-0034-6952"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shan Zhong","raw_affiliation_strings":["Changshu Institute of Technology,School of Computer Science and Engineering","School of Computer Science and Engineering, Changshu Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Changshu Institute of Technology,School of Computer Science and Engineering","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088395357","display_name":"Wenhao Ying","orcid":"https://orcid.org/0000-0001-5992-5444"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Ying","raw_affiliation_strings":["Changshu Institute of Technology,School of Computer Science and Engineering","School of Computer Science and Engineering, Changshu Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Changshu Institute of Technology,School of Computer Science and Engineering","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114461972","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0002-1728-9563"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Wang","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Electronics and Information Engineering, Suzhou University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038799336","display_name":"Shengrong Gong","orcid":"https://orcid.org/0000-0003-0266-2422"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]},{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengrong Gong","raw_affiliation_strings":["Suzhou University of Science and Technology,School of Electronics and Information Engineering","School of Computer Science and Engineering, Changshu Institute of Technology","School of Electronics and Information Engineering, Suzhou University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Suzhou University of Science and Technology,School of Electronics and Information Engineering","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Electronics and Information Engineering, Suzhou University of Science and Technology","institution_ids":["https://openalex.org/I308837"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5054600540"],"corresponding_institution_ids":["https://openalex.org/I308837"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02597746,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6540","last_page":"6544"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.8877745270729065},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7204210758209229},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.633755624294281},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5667054653167725},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.563147783279419},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5428436398506165},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5234538912773132},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.45017409324645996},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4464249610900879},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34031689167022705},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.336531400680542},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2408968210220337},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19168677926063538},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.12432464957237244},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.09954991936683655},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.08015972375869751}],"concepts":[{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.8877745270729065},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204210758209229},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.633755624294281},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5667054653167725},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.563147783279419},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5428436398506165},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5234538912773132},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.45017409324645996},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4464249610900879},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34031689167022705},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.336531400680542},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2408968210220337},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19168677926063538},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.12432464957237244},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.09954991936683655},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.08015972375869751},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446392","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2082826544","display_name":null,"funder_award_id":"Postdoctoral","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2104653401","display_name":null,"funder_award_id":"2021M","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G2447216977","display_name":null,"funder_award_id":"62376041","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2593542410","display_name":null,"funder_award_id":"2021M69236","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3398038842","display_name":null,"funder_award_id":"61972059","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1491843047","https://openalex.org/W2268617045","https://openalex.org/W3212671125","https://openalex.org/W4225110331","https://openalex.org/W4226345170","https://openalex.org/W4288319859","https://openalex.org/W4298206671","https://openalex.org/W4306819990","https://openalex.org/W4309569224","https://openalex.org/W4317837800","https://openalex.org/W4366507128","https://openalex.org/W4379739787","https://openalex.org/W4381384259","https://openalex.org/W6747473740","https://openalex.org/W6751494529","https://openalex.org/W6764053384","https://openalex.org/W6779142360","https://openalex.org/W6803529815","https://openalex.org/W6810277566","https://openalex.org/W6810450869","https://openalex.org/W6846235960","https://openalex.org/W6846804961","https://openalex.org/W6848761080"],"related_works":["https://openalex.org/W4293202849","https://openalex.org/W1980965563","https://openalex.org/W1489300767","https://openalex.org/W2387995142","https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W2742914308","https://openalex.org/W2330004501","https://openalex.org/W2017089693","https://openalex.org/W2703295919"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1],"learning":[2],"has":[3],"shown":[4],"promise":[5],"in":[6,118],"sample":[7,119],"efficiency":[8,120],"but":[9],"suffers":[10],"from":[11,41],"errors":[12],"accumulated":[13],"during":[14],"multi-step":[15],"model":[16,90],"sampling.":[17],"To":[18],"tackle":[19],"this":[20],"issue,":[21],"we":[22],"propose":[23],"corrected":[24],"data":[25],"aggregation":[26],"for":[27],"model-based":[28,116],"policy":[29],"optimization.":[30],"This":[31],"approach":[32],"involves":[33],"aligning":[34],"simulated":[35,66,97],"trajectories":[36],"with":[37,46],"their":[38],"real":[39,93],"counterparts":[40],"random":[42],"starting":[43],"states":[44],"and":[45,87,95,121],"varying":[47],"sampling":[48],"lengths":[49],"to":[50,60],"create":[51],"paired":[52,80],"real-simulated":[53],"samples.":[54],"The":[55,107],"R-Q":[56,71,100],"discriminator":[57],"is":[58,129],"incorporated":[59],"assess":[61],"the":[62,65,70,84,88,96],"quality":[63],"of":[64],"samples":[67,94,98],"by":[68],"computing":[69],"difference,":[72],"modeled":[73],"as":[74],"a":[75,104],"Gaussian":[76],"distribution":[77],"within":[78],"each":[79],"sample.":[81],"We":[82],"update":[83],"Q":[85],"network":[86],"dynamics":[89],"using":[91],"all":[92],"whose":[99],"difference":[101],"fall":[102],"below":[103],"predefined":[105],"threshold.":[106],"experimental":[108],"results":[109],"demonstrate":[110],"that":[111],"our":[112],"method":[113],"outperforms":[114],"state-of-the-art":[115],"methods":[117],"asymptotic":[122],"performance":[123],"across":[124],"challenging":[125],"tasks.":[126],"Our":[127],"code":[128],"available":[130],"at":[131],"https://github.com/duxin0618/CDA-MBPO.":[132]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
