{"id":"https://openalex.org/W4402352352","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650719","title":"RLBOF: Reinforcement Learning from Bayesian Optimization Feedback","display_name":"RLBOF: Reinforcement Learning from Bayesian Optimization Feedback","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402352352","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650719"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10650719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113006286","display_name":"Hailong Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hailong Huang","raw_affiliation_strings":["Zhejiang University,School of Sofware Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Sofware Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043803804","display_name":"Xiubo Liang","orcid":"https://orcid.org/0000-0002-4749-5552"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiubo Liang","raw_affiliation_strings":["Zhejiang University,School of Sofware Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Sofware Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027143753","display_name":"Quanwei Zhang","orcid":"https://orcid.org/0009-0007-2121-2159"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanwei Zhang","raw_affiliation_strings":["Zhejiang University,School of Sofware Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Sofware Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396652","display_name":"Hongzhi Wang","orcid":"https://orcid.org/0000-0002-7521-2871"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongzhi Wang","raw_affiliation_strings":["Zhejiang University,School of Sofware Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Sofware Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100416430","display_name":"Xiangdong Li","orcid":"https://orcid.org/0000-0001-7778-6230"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangdong Li","raw_affiliation_strings":["Zhejiang University,College of Computer Science and Technology,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,College of Computer Science and Technology,Hangzhou,China","institution_ids":["https://openalex.org/I168879160"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113006286"],"corresponding_institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.3637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66502001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8545897006988525},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7206884026527405},{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.7154006958007812},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.5893531441688538},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5252758264541626},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.501988410949707},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45778122544288635},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12405064702033997},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.04990682005882263}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8545897006988525},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7206884026527405},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.7154006958007812},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5893531441688538},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5252758264541626},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.501988410949707},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45778122544288635},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12405064702033997},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.04990682005882263}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10650719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W147998453","https://openalex.org/W1596981350","https://openalex.org/W1994005439","https://openalex.org/W2099201756","https://openalex.org/W2107726111","https://openalex.org/W2131241448","https://openalex.org/W2192203593","https://openalex.org/W2209913494","https://openalex.org/W2556372419","https://openalex.org/W2732547613","https://openalex.org/W2736601468","https://openalex.org/W2780562800","https://openalex.org/W2891146651","https://openalex.org/W2893995718","https://openalex.org/W2963557251","https://openalex.org/W3048118356","https://openalex.org/W3118300017","https://openalex.org/W3124229194","https://openalex.org/W3126300335","https://openalex.org/W3130744522","https://openalex.org/W3210568174","https://openalex.org/W3213051418","https://openalex.org/W4226412614","https://openalex.org/W4252003911","https://openalex.org/W4285070053","https://openalex.org/W4287674181","https://openalex.org/W4288375870","https://openalex.org/W4289761856","https://openalex.org/W4320853913","https://openalex.org/W4324304837","https://openalex.org/W4376311940","https://openalex.org/W4376632920","https://openalex.org/W4378501141","https://openalex.org/W4385245566","https://openalex.org/W4386730022","https://openalex.org/W4387459858","https://openalex.org/W6603190504","https://openalex.org/W6634413486","https://openalex.org/W6638018090","https://openalex.org/W6638209102","https://openalex.org/W6675200109","https://openalex.org/W6678911119","https://openalex.org/W6692846177","https://openalex.org/W6730269975","https://openalex.org/W6741002519","https://openalex.org/W6752040014","https://openalex.org/W6753146606","https://openalex.org/W6754399969","https://openalex.org/W6755463424","https://openalex.org/W6758711939","https://openalex.org/W6764988152","https://openalex.org/W6769040946","https://openalex.org/W6781903740","https://openalex.org/W6782465632","https://openalex.org/W6796440240","https://openalex.org/W6797021028","https://openalex.org/W6803697892","https://openalex.org/W6839443103","https://openalex.org/W6849750409","https://openalex.org/W6852764809","https://openalex.org/W6853009676","https://openalex.org/W6853537668"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W3199608561","https://openalex.org/W4401858220","https://openalex.org/W4382049207"],"abstract_inverted_index":{"Bayesian":[0,71,99,147,153,171,202],"Optimization":[1,18,165],"is":[2,20],"a":[3,21,84,135],"powerful":[4],"technique":[5],"employed":[6],"to":[7,25,92,107,150,166],"address":[8],"black-box":[9],"optimization":[10,100,148,154],"problems,":[11],"finding":[12],"applications":[13],"in":[14,43,49,76,126,201],"various":[15,188],"domains.":[16],"Meta-Bayesian":[17],"(Meta-BO)":[19],"specific":[22],"approach":[23,86,132,179],"designed":[24],"improve":[26],"data":[27,57],"efficiency":[28],"by":[29],"leveraging":[30,142],"information":[31],"from":[32,58,119,145,170],"related":[33,59],"tasks.":[34,60,101],"In":[35],"recent":[36],"years,":[37],"there":[38],"has":[39,180],"been":[40],"notable":[41],"progress":[42],"the":[44,77,89,95,104,114,162,174,196,205],"field":[45],"of":[46,70,79,211],"Meta-BO,":[47],"particularly":[48],"surrogate":[50,90,105,138,176],"models":[51,122,186,197],"and":[52,140,187,207],"acquisition":[53,96],"functions":[54],"that":[55,87,123],"utilize":[56,167],"However,":[61],"these":[62,80],"advancements":[63,200],"have":[64,160],"predominantly":[65],"focused":[66],"on":[67],"singular":[68],"aspects":[69],"optimization,":[72,172,203],"leaving":[73],"untapped":[74],"potential":[75],"integration":[78],"two":[81],"aspects.We":[82],"propose":[83],"novel":[85],"enables":[88],"model":[91,106,139],"effectively":[93,112],"integrate":[94],"function":[97,110],"for":[98],"This":[102],"makes":[103],"transcend":[108],"mere":[109],"approximation,":[111],"addressing":[113],"aforementioned":[115],"problem.":[116],"Taking":[117],"inspiration":[118],"large":[120],"language":[121],"receive":[124],"feedback":[125,143,168],"actual":[127],"human":[128],"dialogue":[129],"tasks,":[130],"our":[131,212],"involves":[133],"pre-training":[134],"neural":[136],"process":[137],"subsequently":[141],"obtained":[144],"real":[146],"scenarios":[149],"enhance":[151],"its":[152],"capability.":[155],"To":[156],"achieve":[157],"this,":[158],"we":[159],"extended":[161],"Proximal":[163],"Policy":[164],"derived":[169],"incentivizing":[173],"pre-trained":[175],"model.":[177],"Our":[178],"undergone":[181],"thorough":[182],"evaluation":[183],"across":[184],"diverse":[185],"benchmark":[189],"functions.":[190],"Remarkably,":[191],"even":[192],"with":[193],"minimal":[194],"incentives,":[195],"exhibit":[198],"significant":[199],"highlighting":[204],"effectiveness":[206],"robust":[208],"generalization":[209],"ability":[210],"proposed":[213],"method.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
