{"id":"https://openalex.org/W4392182482","doi":"https://doi.org/10.1145/3610977.3634970","title":"PREDILECT: Preferences Delineated with Zero-Shot Language-based Reasoning in Reinforcement Learning","display_name":"PREDILECT: Preferences Delineated with Zero-Shot Language-based Reasoning in Reinforcement Learning","publication_year":2024,"publication_date":"2024-03-10","ids":{"openalex":"https://openalex.org/W4392182482","doi":"https://doi.org/10.1145/3610977.3634970"},"language":"en","primary_location":{"id":"doi:10.1145/3610977.3634970","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3610977.3634970","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3610977.3634970","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3610977.3634970","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056430836","display_name":"Simon Holk","orcid":"https://orcid.org/0000-0001-5727-8140"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Simon Holk","raw_affiliation_strings":["KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":"https://orcid.org/0000-0001-5727-8140","affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084998580","display_name":"Daniel Marta","orcid":"https://orcid.org/0000-0002-3510-5481"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Daniel Marta","raw_affiliation_strings":["KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":"https://orcid.org/0000-0002-3510-5481","affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082559019","display_name":"Iolanda Leite","orcid":"https://orcid.org/0000-0002-2212-4325"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Iolanda Leite","raw_affiliation_strings":["KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":"https://orcid.org/0000-0002-2212-4325","affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056430836"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":3.3114,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.92669184,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"259","last_page":"268"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.8447192907333374},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8216522932052612},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7672866582870483},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6887847185134888},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5714415311813354},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5090951323509216},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.4636674225330353},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4519460201263428},{"id":"https://openalex.org/keywords/one-shot","display_name":"One shot","score":0.4396897852420807},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.4211135506629944},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4111711382865906},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3847317695617676},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07542797923088074}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.8447192907333374},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8216522932052612},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7672866582870483},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6887847185134888},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5714415311813354},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5090951323509216},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.4636674225330353},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4519460201263428},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.4396897852420807},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.4211135506629944},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4111711382865906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3847317695617676},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07542797923088074},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3610977.3634970","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3610977.3634970","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3610977.3634970","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2402.15420","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.15420","pdf_url":"https://arxiv.org/pdf/2402.15420","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3610977.3634970","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3610977.3634970","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3610977.3634970","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392182482.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W2110074961","https://openalex.org/W2151074445","https://openalex.org/W2156869222","https://openalex.org/W2167052694","https://openalex.org/W2187105870","https://openalex.org/W2567545459","https://openalex.org/W2597141888","https://openalex.org/W2622408375","https://openalex.org/W2735318784","https://openalex.org/W2763110165","https://openalex.org/W2785324569","https://openalex.org/W2790924949","https://openalex.org/W2889990052","https://openalex.org/W2963785012","https://openalex.org/W2964335674","https://openalex.org/W2991276239","https://openalex.org/W2997131443","https://openalex.org/W2999620783","https://openalex.org/W3006303816","https://openalex.org/W3006334608","https://openalex.org/W3035160371","https://openalex.org/W3035304632","https://openalex.org/W3039563104","https://openalex.org/W3103105396","https://openalex.org/W3128663629","https://openalex.org/W3171912333","https://openalex.org/W3177174258","https://openalex.org/W4212774754","https://openalex.org/W4221143046","https://openalex.org/W4281558089","https://openalex.org/W4283788970","https://openalex.org/W4287113019","https://openalex.org/W4298427539"],"related_works":["https://openalex.org/W2768698792","https://openalex.org/W2497720472","https://openalex.org/W4292659306","https://openalex.org/W3044321615","https://openalex.org/W2806221744","https://openalex.org/W2326937258","https://openalex.org/W394267150","https://openalex.org/W2773965352","https://openalex.org/W4294892107","https://openalex.org/W2357748469"],"abstract_inverted_index":{"Preference-based":[0],"reinforcement":[1],"learning":[2,103],"(RL)":[3],"has":[4],"emerged":[5],"as":[6],"a":[7,16,80,125,129,134,138,163,170],"new":[8],"field":[9],"in":[10,19,124,169],"robot":[11,21,164],"learning,":[12],"where":[13],"humans":[14,41],"play":[15],"pivotal":[17],"role":[18],"shaping":[20],"behavior":[22],"by":[23,56,91,148],"expressing":[24],"preferences":[25,66],"on":[26,165],"different":[27],"sequences":[28],"of":[29,46,79,145,179],"state-action":[30,110],"pairs.":[31],"However,":[32],"formulating":[33],"realistic":[34],"policies":[35,182],"for":[36],"robots":[37],"demands":[38],"responses":[39],"from":[40,87,128],"to":[42,63,85,105,120,161],"an":[43],"extensive":[44],"array":[45],"queries.":[47],"In":[48,132],"this":[49],"work,":[50],"we":[51,74,99,141],"approach":[52],"the":[53,58,76,88,95,101,121,143,150,156,180],"sample-efficiency":[54],"challenge":[55],"expanding":[57],"information":[59,116],"collected":[60,159],"per":[61],"query":[62,97],"contain":[64,106,113],"both":[65,133],"and":[67,117,137,152],"optional":[68],"text":[69,89],"prompting.":[70],"To":[71,93],"accomplish":[72],"this,":[73],"leverage":[75],"zero-shot":[77,126],"capabilities":[78],"large":[81],"language":[82],"model":[83],"(LLM)":[84],"reason":[86],"provided":[90],"humans.":[92],"accommodate":[94],"additional":[96],"information,":[98],"reformulate":[100],"reward":[102],"objectives":[104],"flexible":[107],"highlights":[108],"--":[109],"pairs":[111],"that":[112],"relatively":[114],"high":[115],"are":[118],"related":[119],"features":[122],"processed":[123],"fashion":[127],"pretrained":[130],"LLM.":[131],"simulated":[135,171],"scenario":[136],"user":[139],"study,":[140],"reveal":[142],"effectiveness":[144],"our":[146],"work":[147],"analyzing":[149],"feedback":[151,158],"its":[153],"implications.":[154],"Additionally,":[155],"collective":[157],"serves":[160],"train":[162],"socially":[166],"compliant":[167],"trajectories":[168],"social":[172],"navigation":[173],"landscape.":[174],"We":[175],"provide":[176],"video":[177],"examples":[178],"trained":[181],"at":[183],"https://sites.google.com/view/rl-predilect":[184]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
