{"id":"https://openalex.org/W4309344387","doi":"https://doi.org/10.1109/smc53654.2022.9945333","title":"Advances in Preference-based Reinforcement Learning: A Review","display_name":"Advances in Preference-based Reinforcement Learning: A Review","publication_year":2022,"publication_date":"2022-10-09","ids":{"openalex":"https://openalex.org/W4309344387","doi":"https://doi.org/10.1109/smc53654.2022.9945333"},"language":"en","primary_location":{"id":"doi:10.1109/smc53654.2022.9945333","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53654.2022.9945333","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"review","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2408.11943","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000571607","display_name":"Youssef Abdelkareem","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Youssef Abdelkareem","raw_affiliation_strings":["University of Waterloo,Electrical and Computer Engineering,Waterloo,Canada","Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo,Electrical and Computer Engineering,Waterloo,Canada","institution_ids":["https://openalex.org/I151746483"]},{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075022926","display_name":"Shady Shehata","orcid":"https://orcid.org/0000-0002-3258-6734"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Shady Shehata","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence,Abu Dhabi,UAE","Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE"],"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence,Abu Dhabi,UAE","institution_ids":["https://openalex.org/I4210113480"]},{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067077979","display_name":"Fakhri Karray","orcid":"https://orcid.org/0000-0002-4217-1372"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Fakhri Karray","raw_affiliation_strings":["University of Waterloo,Electrical and Computer Engineering,Waterloo,Canada","Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo,Electrical and Computer Engineering,Waterloo,Canada","institution_ids":["https://openalex.org/I151746483"]},{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5000571607"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":0.3141,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.52138655,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2527","last_page":"2532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.881344199180603},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7754635810852051},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.766740620136261},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6565873026847839},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5415343046188354},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5034112334251404},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.5007166862487793},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4960061013698578},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47610601782798767},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.349467933177948},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3347366154193878}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.881344199180603},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7754635810852051},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.766740620136261},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6565873026847839},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5415343046188354},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5034112334251404},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.5007166862487793},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4960061013698578},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47610601782798767},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.349467933177948},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3347366154193878},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/smc53654.2022.9945333","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53654.2022.9945333","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2408.11943","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.11943","pdf_url":"https://arxiv.org/pdf/2408.11943","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2408.11943","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.11943","pdf_url":"https://arxiv.org/pdf/2408.11943","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4309344387.pdf"},"referenced_works_count":80,"referenced_works":["https://openalex.org/W1583953806","https://openalex.org/W1771410628","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W1987725948","https://openalex.org/W2013784666","https://openalex.org/W2032950725","https://openalex.org/W2039522160","https://openalex.org/W2058475745","https://openalex.org/W2077673559","https://openalex.org/W2089954205","https://openalex.org/W2116671302","https://openalex.org/W2135194391","https://openalex.org/W2149721706","https://openalex.org/W2154023516","https://openalex.org/W2158782408","https://openalex.org/W2289966747","https://openalex.org/W2404342324","https://openalex.org/W2462906003","https://openalex.org/W2614730337","https://openalex.org/W2741672218","https://openalex.org/W2781585732","https://openalex.org/W2781726626","https://openalex.org/W2962901934","https://openalex.org/W2962972512","https://openalex.org/W2963228265","https://openalex.org/W2964043796","https://openalex.org/W2965175271","https://openalex.org/W2974377482","https://openalex.org/W2981344907","https://openalex.org/W2990212132","https://openalex.org/W2996726407","https://openalex.org/W3005680577","https://openalex.org/W3006334608","https://openalex.org/W3006395923","https://openalex.org/W3015662311","https://openalex.org/W3016210511","https://openalex.org/W3034808231","https://openalex.org/W3035599863","https://openalex.org/W3042185737","https://openalex.org/W3131920644","https://openalex.org/W3169375224","https://openalex.org/W3200980294","https://openalex.org/W3216656735","https://openalex.org/W4221163700","https://openalex.org/W4226146935","https://openalex.org/W4287122174","https://openalex.org/W4287674181","https://openalex.org/W4289010281","https://openalex.org/W6633965395","https://openalex.org/W6638018090","https://openalex.org/W6638088447","https://openalex.org/W6677285668","https://openalex.org/W6691531235","https://openalex.org/W6692846177","https://openalex.org/W6696125644","https://openalex.org/W6713347985","https://openalex.org/W6718836005","https://openalex.org/W6737581885","https://openalex.org/W6747387971","https://openalex.org/W6747473740","https://openalex.org/W6748210908","https://openalex.org/W6757991398","https://openalex.org/W6763285099","https://openalex.org/W6766451708","https://openalex.org/W6769596995","https://openalex.org/W6771787661","https://openalex.org/W6773378895","https://openalex.org/W6774149402","https://openalex.org/W6774314701","https://openalex.org/W6776373295","https://openalex.org/W6776565550","https://openalex.org/W6779523381","https://openalex.org/W6782465632","https://openalex.org/W6785423136","https://openalex.org/W6796168157","https://openalex.org/W6799431981","https://openalex.org/W6801858553","https://openalex.org/W6810044100","https://openalex.org/W6843699536"],"related_works":["https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2284465472","https://openalex.org/W2291782699","https://openalex.org/W1993948687","https://openalex.org/W2000169967","https://openalex.org/W2112883198"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"algorithms":[3],"suffer":[4],"from":[5,36],"the":[6,16,21,37,74,80,94,102,117,120,124],"dependency":[7],"on":[8],"accurately":[9],"engineered":[10],"reward":[11],"functions":[12],"to":[13,19,44,72],"properly":[14],"guide":[15],"learning":[17,26],"agents":[18],"do":[20],"required":[22],"tasks.":[23,112],"Preference-based":[24],"reinforcement":[25],"(PbRL)":[27],"addresses":[28],"that":[29,78],"by":[30],"utilizing":[31],"human":[32],"preferences":[33],"as":[34],"feedback":[35],"experts":[38],"instead":[39],"of":[40,84,93,119],"numeric":[41],"rewards.":[42],"Due":[43],"its":[45,106],"promising":[46],"advantage":[47],"over":[48,116],"traditional":[49],"RL,":[50],"PbRL":[51,70],"has":[52],"gained":[53],"more":[54],"focus":[55],"in":[56,101,109],"recent":[57,107],"years":[58],"with":[59],"many":[60],"significant":[61],"advances.":[62],"In":[63,86],"this":[64],"survey,":[65],"we":[66,88,114],"present":[67],"a":[68,90],"unified":[69],"framework":[71],"include":[73],"newly":[75],"emerging":[76],"approaches":[77,122],"improve":[79],"scalability":[81],"and":[82,97,123],"efficiency":[83],"PbRL.":[85],"addition,":[87],"give":[89],"detailed":[91],"overview":[92],"theoretical":[95],"guarantees":[96],"benchmarking":[98],"work":[99],"done":[100],"field,":[103],"while":[104],"presenting":[105],"applications":[108],"complex":[110],"real-world":[111],"Lastly,":[113],"go":[115],"limitations":[118],"current":[121],"proposed":[125],"future":[126],"research":[127],"directions.":[128]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
