{"id":"https://openalex.org/W4401417401","doi":"https://doi.org/10.1109/icra57147.2024.10610505","title":"POLITE: Preferences Combined with Highlights in Reinforcement Learning","display_name":"POLITE: Preferences Combined with Highlights in Reinforcement Learning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401417401","doi":"https://doi.org/10.1109/icra57147.2024.10610505"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610505","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610505","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056430836","display_name":"Simon Holk","orcid":"https://orcid.org/0000-0001-5727-8140"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Simon Holk","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084998580","display_name":"Daniel Marta","orcid":"https://orcid.org/0000-0002-3510-5481"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Daniel Marta","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082559019","display_name":"Iolanda Leite","orcid":"https://orcid.org/0000-0002-2212-4325"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Iolanda Leite","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056430836"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":2.7901,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90722358,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2288","last_page":"2295"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.947700023651123,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/politeness","display_name":"Politeness","score":0.9303948879241943},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7612489461898804},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7006930112838745},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4679882228374481},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4387121796607971},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3568243682384491},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33426934480667114},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15134859085083008},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0964040458202362}],"concepts":[{"id":"https://openalex.org/C61123122","wikidata":"https://www.wikidata.org/wiki/Q281287","display_name":"Politeness","level":2,"score":0.9303948879241943},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7612489461898804},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7006930112838745},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4679882228374481},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4387121796607971},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3568243682384491},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33426934480667114},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15134859085083008},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0964040458202362},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610505","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610505","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W1986014385","https://openalex.org/W2151074445","https://openalex.org/W2156869222","https://openalex.org/W2567545459","https://openalex.org/W2597141888","https://openalex.org/W2735318784","https://openalex.org/W2736601468","https://openalex.org/W2790924949","https://openalex.org/W2889990052","https://openalex.org/W2907855292","https://openalex.org/W2947630374","https://openalex.org/W2950872548","https://openalex.org/W2955821606","https://openalex.org/W2962889474","https://openalex.org/W2963009616","https://openalex.org/W2964335674","https://openalex.org/W2972981152","https://openalex.org/W2994446013","https://openalex.org/W3006334608","https://openalex.org/W3038618902","https://openalex.org/W3039563104","https://openalex.org/W3091272913","https://openalex.org/W3133184250","https://openalex.org/W3134834355","https://openalex.org/W3134935089","https://openalex.org/W3138984732","https://openalex.org/W3171912333","https://openalex.org/W3197594072","https://openalex.org/W3213330367","https://openalex.org/W4212774754","https://openalex.org/W4241519124","https://openalex.org/W4281887079","https://openalex.org/W4281963856","https://openalex.org/W4283080321","https://openalex.org/W4283784806","https://openalex.org/W4283788970","https://openalex.org/W4287122174","https://openalex.org/W4297795161","https://openalex.org/W4298179156","https://openalex.org/W4302423442","https://openalex.org/W4310922000","https://openalex.org/W4312376102","https://openalex.org/W4312772252","https://openalex.org/W4360991189","https://openalex.org/W4383108778","https://openalex.org/W4389667165","https://openalex.org/W4392182482","https://openalex.org/W4392633917","https://openalex.org/W6729556111","https://openalex.org/W6731293529","https://openalex.org/W6731334075","https://openalex.org/W6738860160","https://openalex.org/W6739585900","https://openalex.org/W6741002519","https://openalex.org/W6748210908","https://openalex.org/W6748663133","https://openalex.org/W6752818329","https://openalex.org/W6753960536","https://openalex.org/W6761908843","https://openalex.org/W6767425681","https://openalex.org/W6769174716","https://openalex.org/W6774122581","https://openalex.org/W6774583127","https://openalex.org/W6774672031","https://openalex.org/W6780559895","https://openalex.org/W6784131548","https://openalex.org/W6785484245","https://openalex.org/W6786087624","https://openalex.org/W6791684492","https://openalex.org/W6796168157","https://openalex.org/W6838090008","https://openalex.org/W6838587351","https://openalex.org/W6838944978","https://openalex.org/W6843748866","https://openalex.org/W6847353123","https://openalex.org/W6848370696","https://openalex.org/W6891918056"],"related_works":["https://openalex.org/W2394026242","https://openalex.org/W3157531824","https://openalex.org/W627623443","https://openalex.org/W2350380269","https://openalex.org/W4388526518","https://openalex.org/W1821483538","https://openalex.org/W313541083","https://openalex.org/W2988012016","https://openalex.org/W4400665158","https://openalex.org/W2994985942"],"abstract_inverted_index":{"Many":[0],"solutions":[1],"to":[2,19,62,88,129,146,183],"address":[3],"the":[4,39,79,83,106,132,139,148,151,156,170,179],"challenge":[5],"of":[6,49,52,65,74,86,94,135,141,150,158],"robot":[7,186],"learning":[8,27,117],"have":[9],"been":[10],"devised,":[11],"namely":[12],"through":[13],"exploring":[14],"novel":[15,111],"ways":[16],"for":[17,90,188],"humans":[18],"communicate":[20],"complex":[21],"goals":[22],"and":[23,164,173,201],"tasks":[24],"in":[25,78,144,161,192],"reinforcement":[26],"(RL)":[28],"setups.":[29],"One":[30],"way":[31],"that":[32,67],"experienced":[33],"recent":[34],"research":[35],"interest":[36],"directly":[37],"addresses":[38],"problem":[40],"by":[41,137],"considering":[42],"human":[43],"feedback":[44,171,182],"as":[45],"preferences":[46,87],"between":[47],"pairs":[48,93],"trajectories":[50,66,191],"(sequences":[51],"state-action":[53,92],"pairs).":[54],"However,":[55],"when":[56],"simply":[57],"attributing":[58],"a":[59,63,101,115,165,185,193],"single":[60],"preference":[61,116],"pair":[64],"contain":[68],"many":[69],"agglomerated":[70],"steps,":[71],"key":[72],"pieces":[73],"information":[75,97],"are":[76],"lost":[77],"process.":[80],"We":[81,154,176,198],"amplify":[82],"initial":[84],"definition":[85],"account":[89],"highlights:":[91],"relatively":[95],"high":[96],"(high/low":[98],"reward)":[99],"within":[100,114],"preferred":[102],"trajectory.":[103],"To":[104,119],"include":[105],"additional":[107],"information,":[108],"we":[109,122],"design":[110],"regularization":[112],"methods":[113],"framework.":[118],"this":[120],"extent,":[121],"present":[123],"our":[124,159],"method":[125],"which":[126,168],"is":[127],"able":[128],"greatly":[130],"reduce":[131,147],"necessary":[133],"amount":[134],"preferences,":[136],"permitting":[138],"highlighting":[140],"favoured":[142],"trajectories,":[143],"order":[145],"entropy":[149],"credit":[152],"assignment.":[153],"show":[155],"effectiveness":[157],"work":[160],"both":[162],"simulation":[163],"user":[166],"study,":[167],"analyzes":[169],"given":[172],"its":[174],"implications.":[175],"also":[177],"use":[178],"total":[180],"collected":[181],"train":[184],"policy":[187],"socially":[189],"compliant":[190],"simulated":[194],"social":[195],"navigation":[196],"environment.":[197],"release":[199],"code":[200],"video":[202],"examples":[203],"at":[204],"https://sites.google.com/view/rl-polite":[205]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
