{"id":"https://openalex.org/W4390099663","doi":"https://doi.org/10.1109/robio58561.2023.10355039","title":"METREE: Max-Entropy Exploration with Random Encoding for Efficient RL with Human Preferences","display_name":"METREE: Max-Entropy Exploration with Random Encoding for Efficient RL with Human Preferences","publication_year":2023,"publication_date":"2023-12-04","ids":{"openalex":"https://openalex.org/W4390099663","doi":"https://doi.org/10.1109/robio58561.2023.10355039"},"language":"en","primary_location":{"id":"doi:10.1109/robio58561.2023.10355039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio58561.2023.10355039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093559067","display_name":"Isabel Y.N Guan","orcid":"https://orcid.org/0009-0003-0141-3165"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Isabel Y.N Guan","raw_affiliation_strings":["Peking University,School of Software and Microelectronics,Beijing,China,100871"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Software and Microelectronics,Beijing,China,100871","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100352274","display_name":"Xin Liu","orcid":"https://orcid.org/0000-0002-8272-9553"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Liu","raw_affiliation_strings":["Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052547218","display_name":"Gary Zhang","orcid":"https://orcid.org/0000-0002-0600-5051"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Gary Zhang","raw_affiliation_strings":["Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore","School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113078573","display_name":"Estella Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Estella Zhao","raw_affiliation_strings":["Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075727192","display_name":"Zhenzhong Jia","orcid":"https://orcid.org/0000-0001-5924-5502"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenzhong Jia","raw_affiliation_strings":["Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5093559067"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.1789,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60055995,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9549000263214111,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9028854370117188},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7202181816101074},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6784933805465698},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.6316002607345581},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6110540628433228},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5055854320526123},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4966467022895813},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.45878034830093384},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4399249851703644},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.20447638630867004}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9028854370117188},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7202181816101074},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6784933805465698},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.6316002607345581},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6110540628433228},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5055854320526123},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4966467022895813},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.45878034830093384},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4399249851703644},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.20447638630867004},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/robio58561.2023.10355039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio58561.2023.10355039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-141817","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-141817","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W770013183","https://openalex.org/W1945123189","https://openalex.org/W2016765487","https://openalex.org/W2111935653","https://openalex.org/W2143277772","https://openalex.org/W2145339207","https://openalex.org/W2156869222","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2460299708","https://openalex.org/W2462906003","https://openalex.org/W2529658650","https://openalex.org/W2530944449","https://openalex.org/W2539402368","https://openalex.org/W2539881804","https://openalex.org/W2575705757","https://openalex.org/W2604272474","https://openalex.org/W2790355818","https://openalex.org/W2913668833","https://openalex.org/W2963037989","https://openalex.org/W2963669336","https://openalex.org/W2963684088","https://openalex.org/W2963713397","https://openalex.org/W2964112890","https://openalex.org/W2973229164","https://openalex.org/W2990775381","https://openalex.org/W3039563104","https://openalex.org/W3099442372","https://openalex.org/W3133533407","https://openalex.org/W4242937284","https://openalex.org/W4300198501","https://openalex.org/W4321392130","https://openalex.org/W6677285668","https://openalex.org/W6685352114","https://openalex.org/W6718836005","https://openalex.org/W6720501231","https://openalex.org/W6728367041","https://openalex.org/W6736057607","https://openalex.org/W6739585900","https://openalex.org/W6751955673","https://openalex.org/W6762863188","https://openalex.org/W6770547988","https://openalex.org/W6780559895","https://openalex.org/W6796168157","https://openalex.org/W6838843634"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W1950940422","https://openalex.org/W4283822356","https://openalex.org/W2129146436","https://openalex.org/W2032507829","https://openalex.org/W2147282173"],"abstract_inverted_index":{"In":[0,33],"recent":[1],"years,":[2],"reinforcement":[3,22],"learning":[4,23],"has":[5],"achieved":[6],"significant":[7],"advances":[8],"in":[9,21],"practical":[10],"domains":[11],"such":[12],"as":[13],"robotics.":[14],"However,":[15],"conveying":[16],"intricate":[17],"objectives":[18],"to":[19],"agents":[20],"(RL)":[24],"remains":[25],"challenging,":[26],"often":[27],"necessitating":[28],"detailed":[29],"reward":[30,67],"function":[31],"design.":[32],"this":[34],"study,":[35],"we":[36],"introduce":[37],"an":[38],"innovative":[39],"approach,":[40],"MEETRE,":[41],"which":[42],"integrates":[43],"max-entropy":[44],"exploration":[45],"strategies":[46],"with":[47],"random":[48],"encoders.":[49],"This":[50],"offers":[51],"a":[52],"streamlined":[53],"and":[54],"efficient":[55],"solution":[56],"for":[57,64,74,87],"human-involved":[58],"preference-based":[59],"RL":[60],"without":[61],"the":[62,72,81],"need":[63,73],"meticulously":[65],"designed":[66],"functions.":[68],"Furthermore,":[69],"MEETRE":[70],"sidesteps":[71],"additional":[75],"models":[76],"or":[77],"representation":[78],"learning,":[79],"leveraging":[80],"power":[82],"of":[83],"randomly":[84],"initialized":[85],"encoders":[86],"effective":[88],"exploration.":[89]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
