{"id":"https://openalex.org/W3092253573","doi":"https://doi.org/10.1109/tcyb.2020.3023127","title":"Inference-Based Posteriori Parameter Distribution Optimization","display_name":"Inference-Based Posteriori Parameter Distribution Optimization","publication_year":2020,"publication_date":"2020-10-07","ids":{"openalex":"https://openalex.org/W3092253573","doi":"https://doi.org/10.1109/tcyb.2020.3023127","mag":"3092253573","pmid":"https://pubmed.ncbi.nlm.nih.gov/33027029"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2020.3023127","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2020.3023127","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108064895","display_name":"Xuesong Wang","orcid":"https://orcid.org/0000-0002-5327-1088"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuesong Wang","raw_affiliation_strings":["Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China","School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460614","display_name":"Tianyi Li","orcid":"https://orcid.org/0000-0002-9423-6369"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyi Li","raw_affiliation_strings":["Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China","School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091364297","display_name":"Yuhu Cheng","orcid":"https://orcid.org/0000-0003-2022-9999"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhu Cheng","raw_affiliation_strings":["Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"Xuzhou Key Laboratory of Artificial Intelligence and Big Data, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100643265","display_name":"C. L. Philip Chen","orcid":"https://orcid.org/0000-0001-5451-7230"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]},{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["CN","MO"],"is_corresponding":false,"raw_author_name":"C. L. Philip Chen","raw_affiliation_strings":["Faculty of Science and Technology, University of Macau, Macau, China","School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, University of Macau, Macau, China","institution_ids":["https://openalex.org/I204512498"]},{"raw_affiliation_string":"School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5108064895"],"corresponding_institution_ids":["https://openalex.org/I25757504"],"apc_list":null,"apc_paid":null,"fwci":1.0605,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82358507,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"52","issue":"5","first_page":"3006","last_page":"3017"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6348732113838196},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6156753897666931},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5839530825614929},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5187845230102539},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.47021248936653137},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4473228454589844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3785780072212219},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3587349057197571},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.31039708852767944}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6348732113838196},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6156753897666931},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5839530825614929},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5187845230102539},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.47021248936653137},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4473228454589844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3785780072212219},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3587349057197571},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31039708852767944},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012107","descriptor_name":"Research Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012107","descriptor_name":"Research Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012107","descriptor_name":"Research Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2020.3023127","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2020.3023127","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:33027029","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33027029","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5197980865","display_name":null,"funder_award_id":"2019YFA0706200","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5436807578","display_name":null,"funder_award_id":"2019YFB1703600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5750115239","display_name":null,"funder_award_id":"U1813203","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6792490895","display_name":null,"funder_award_id":"61976215","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7031480287","display_name":null,"funder_award_id":"61772532","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W2119717200","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2462102501","https://openalex.org/W2586702902","https://openalex.org/W2736601468","https://openalex.org/W2765454702","https://openalex.org/W2767696060","https://openalex.org/W2780799698","https://openalex.org/W2785556016","https://openalex.org/W2787938642","https://openalex.org/W2792645523","https://openalex.org/W2798791651","https://openalex.org/W2803620531","https://openalex.org/W2887063112","https://openalex.org/W2895115048","https://openalex.org/W2897975793","https://openalex.org/W2908925809","https://openalex.org/W2945935987","https://openalex.org/W2954033048","https://openalex.org/W2963523627","https://openalex.org/W2964030969","https://openalex.org/W2966349401","https://openalex.org/W2970007912","https://openalex.org/W2970999977","https://openalex.org/W4214717370","https://openalex.org/W6640963894","https://openalex.org/W6682849425","https://openalex.org/W6683300800","https://openalex.org/W6684205842","https://openalex.org/W6692846177","https://openalex.org/W6715102896","https://openalex.org/W6716474083","https://openalex.org/W6729956949","https://openalex.org/W6730111887","https://openalex.org/W6730641667","https://openalex.org/W6731334075","https://openalex.org/W6736057607","https://openalex.org/W6739193204","https://openalex.org/W6740092555","https://openalex.org/W6747473740","https://openalex.org/W6751500528","https://openalex.org/W6764214684"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W4388311650","https://openalex.org/W5922282","https://openalex.org/W1974056099","https://openalex.org/W4245343541","https://openalex.org/W2386077341","https://openalex.org/W563589758","https://openalex.org/W62490179","https://openalex.org/W2954004777","https://openalex.org/W2951102138"],"abstract_inverted_index":{"Encouraging":[0],"the":[1,14,35,39,45,85,89,98,117,120,135,140,145,172,180,226,246],"agent":[2],"to":[3,33,54,66,115,128,171],"explore":[4,243],"has":[5],"always":[6],"been":[7],"an":[8,30,168],"important":[9],"and":[10,61,68,103,134,186,229,236,253],"challenging":[11],"topic":[12],"in":[13,139,245],"field":[15],"of":[16,38,48,87,93,106,119,175],"reinforcement":[17],"learning":[18,63],"(RL).":[19],"Distributional":[20],"representation":[21,46],"for":[22,101,159],"network":[23,49,141,146,163],"parameters":[24,50],"or":[25],"value":[26,121,130],"functions":[27,100,131],"is":[28,82,191],"usually":[29],"effective":[31],"way":[32],"improve":[34,206],"exploration":[36],"ability":[37],"RL":[40,194],"agent.":[41],"However,":[42],"directly":[43],"changing":[44],"form":[47],"from":[51,162],"fixed":[52,184],"values":[53,185],"function":[55,170],"distributions":[56],"may":[57],"cause":[58],"algorithm":[59,81,196,255],"instability":[60],"low":[62],"inefficiency.":[64],"Therefore,":[65],"accelerate":[67],"stabilize":[69],"parameter":[70,77,107,142,147,164,176],"distribution":[71,78,108,148,165],"learning,":[72],"a":[73,156,192],"novel":[74],"inference-based":[75],"posteriori":[76],"optimization":[79,109],"(IPPDO)":[80],"proposed.":[83],"From":[84],"perspective":[86],"solving":[88],"evidence":[90],"lower":[91],"bound":[92],"probability,":[94],"we,":[95],"respectively,":[96],"design":[97,155],"objective":[99],"continuous-action":[102,235],"discrete-action":[104,237],"tasks":[105,238],"based":[110,197],"on":[111,198,225,233],"inference.":[112],"In":[113],"order":[114],"alleviate":[116],"overestimation":[118],"function,":[122],"we":[123,154],"use":[124],"multiple":[125],"neural":[126],"networks":[127],"estimate":[129,137],"with":[132,220],"Retrace,":[133],"smaller":[136],"participates":[138],"update;":[143],"thus,":[144],"can":[149,204,242],"be":[150],"learned.":[151],"After":[152],"that,":[153],"method":[157],"used":[158],"sampling":[160],"weight":[161],"by":[166,209],"adding":[167],"activation":[169],"standard":[173],"deviation":[174],"distribution,":[177],"which":[178,200],"achieves":[179],"adaptive":[181],"adjustment":[182],"between":[183],"distribution.":[187],"Furthermore,":[188],"this":[189],"IPPDO":[190,219,241],"deep":[193],"(DRL)":[195],"off-policy,":[199],"means":[201],"that":[202,240],"it":[203],"effectively":[205],"data":[207],"efficiency":[208],"using":[210],"off-policy":[211],"techniques":[212],"such":[213],"as":[214],"experience":[215],"replay.":[216],"We":[217],"compare":[218],"other":[221],"prevailing":[222],"DRL":[223],"algorithms":[224],"OpenAI":[227],"Gym":[228],"MuJoCo":[230],"platforms.":[231],"Experiments":[232],"both":[234],"indicate":[239],"more":[244],"action":[247],"space,":[248],"get":[249],"higher":[250],"rewards":[251],"faster,":[252],"ensure":[254],"stability.":[256]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
