{"id":"https://openalex.org/W4401414269","doi":"https://doi.org/10.1109/icra57147.2024.10610534","title":"SEQUEL: Semi-Supervised Preference-based RL with Query Synthesis via Latent Interpolation","display_name":"SEQUEL: Semi-Supervised Preference-based RL with Query Synthesis via Latent Interpolation","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401414269","doi":"https://doi.org/10.1109/icra57147.2024.10610534"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610534","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084998580","display_name":"Daniel Marta","orcid":"https://orcid.org/0000-0002-3510-5481"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Daniel Marta","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056430836","display_name":"Simon Holk","orcid":"https://orcid.org/0000-0001-5727-8140"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Simon Holk","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005479279","display_name":"Christian Pek","orcid":"https://orcid.org/0000-0001-7461-920X"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Christian Pek","raw_affiliation_strings":["TU Delft,Dept. of Cognitive Robotics,Delft,The Netherlands,2628 CD"],"affiliations":[{"raw_affiliation_string":"TU Delft,Dept. of Cognitive Robotics,Delft,The Netherlands,2628 CD","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082559019","display_name":"Iolanda Leite","orcid":"https://orcid.org/0000-0002-2212-4325"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Iolanda Leite","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084998580"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":1.0878,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80870704,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"9585","last_page":"9592"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6716939210891724},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.5793455839157104},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5319440960884094},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.483602911233902},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3202889561653137},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1405346691608429},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.128930926322937}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6716939210891724},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.5793455839157104},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5319440960884094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.483602911233902},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3202889561653137},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1405346691608429},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.128930926322937},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610534","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":90,"referenced_works":["https://openalex.org/W122021961","https://openalex.org/W1583953806","https://openalex.org/W2110074961","https://openalex.org/W2116671302","https://openalex.org/W2162531249","https://openalex.org/W2293363371","https://openalex.org/W2462906003","https://openalex.org/W2560674852","https://openalex.org/W2567545459","https://openalex.org/W2606712314","https://openalex.org/W2735318784","https://openalex.org/W2736601468","https://openalex.org/W2763110165","https://openalex.org/W2889987506","https://openalex.org/W2890208753","https://openalex.org/W2947630374","https://openalex.org/W2948978827","https://openalex.org/W2962369866","https://openalex.org/W2962808998","https://openalex.org/W2963223306","https://openalex.org/W2963785012","https://openalex.org/W2995709298","https://openalex.org/W2997131443","https://openalex.org/W3001197829","https://openalex.org/W3006303816","https://openalex.org/W3006334608","https://openalex.org/W3015437096","https://openalex.org/W3021708257","https://openalex.org/W3035160371","https://openalex.org/W3036619998","https://openalex.org/W3039563104","https://openalex.org/W3197594072","https://openalex.org/W3206540493","https://openalex.org/W3213330367","https://openalex.org/W4212774754","https://openalex.org/W4226146935","https://openalex.org/W4245386877","https://openalex.org/W4289288058","https://openalex.org/W4289751869","https://openalex.org/W4293057377","https://openalex.org/W4302423442","https://openalex.org/W4310922000","https://openalex.org/W4312376102","https://openalex.org/W4313598235","https://openalex.org/W4321392130","https://openalex.org/W4383108778","https://openalex.org/W4389667165","https://openalex.org/W4392182482","https://openalex.org/W4392633917","https://openalex.org/W6677285668","https://openalex.org/W6681588610","https://openalex.org/W6714590955","https://openalex.org/W6718836005","https://openalex.org/W6732417791","https://openalex.org/W6736562241","https://openalex.org/W6739585900","https://openalex.org/W6741002519","https://openalex.org/W6743368274","https://openalex.org/W6748210908","https://openalex.org/W6753526802","https://openalex.org/W6753934646","https://openalex.org/W6753960536","https://openalex.org/W6754184789","https://openalex.org/W6754878628","https://openalex.org/W6761908843","https://openalex.org/W6765939562","https://openalex.org/W6769174716","https://openalex.org/W6769596995","https://openalex.org/W6771217966","https://openalex.org/W6771673025","https://openalex.org/W6771787070","https://openalex.org/W6773005947","https://openalex.org/W6774122581","https://openalex.org/W6776867236","https://openalex.org/W6778000925","https://openalex.org/W6780559895","https://openalex.org/W6787713516","https://openalex.org/W6799308143","https://openalex.org/W6799431981","https://openalex.org/W6801328502","https://openalex.org/W6810044100","https://openalex.org/W6810339684","https://openalex.org/W6838843634","https://openalex.org/W6839156189","https://openalex.org/W6842224412","https://openalex.org/W6843699536","https://openalex.org/W6847391325","https://openalex.org/W6848774067","https://openalex.org/W6891918056","https://openalex.org/W6912781838"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Preference-based":[0],"reinforcement":[1],"learning":[2,63,140],"(RL)":[3],"poses":[4],"as":[5,107],"a":[6,55,61,179],"recent":[7,135],"research":[8],"direction":[9],"in":[10,60,178],"robot":[11,30],"learning,":[12],"by":[13,43,53,95,109,113],"allowing":[14],"humans":[15],"to":[16,27,40,85,102,134,163],"teach":[17],"robots":[18],"through":[19],"preferences":[20],"on":[21],"pairs":[22],"of":[23,36,75,79,152,168,185],"desired":[24],"behaviours.":[25],"Nonetheless,":[26],"obtain":[28],"realistic":[29],"policies,":[31],"an":[32],"arbitrarily":[33],"large":[34],"number":[35],"queries":[37,87],"is":[38],"required":[39],"be":[41,189],"answered":[42],"humans.":[44],"In":[45],"this":[46],"work,":[47],"we":[48,68,147,171],"approach":[49,83,133],"the":[50,103,119,150,153,165,186],"sample-efficiency":[51],"challenge":[52],"presenting":[54],"technique":[56],"which":[57,88],"synthesizes":[58],"queries,":[59],"semi-supervised":[62,139],"perspective.":[64],"To":[65],"achieve":[66],"this,":[67],"leverage":[69],"latent":[70],"variational":[71],"autoencoder":[72],"(VAE)":[73],"representations":[74,127],"trajectory":[76],"segments":[77],"(sequences":[78],"state-action":[80],"pairs).":[81],"Our":[82,142],"manages":[84],"produce":[86],"are":[89,128],"closely":[90],"aligned":[91],"with":[92],"those":[93],"labeled":[94],"humans,":[96],"while":[97],"avoiding":[98],"excessive":[99],"uncertainty":[100],"according":[101],"human":[104,160,176],"preference":[105],"predictions":[106],"determined":[108],"reward":[110,125,155],"estimations.":[111],"Additionally,":[112],"introducing":[114],"variation":[115],"without":[116,157],"deviating":[117],"from":[118],"original":[120],"human\u2019s":[121],"intents,":[122],"more":[123],"robust":[124],"function":[126,156],"achieved.":[129],"We":[130],"compare":[131],"our":[132,169],"state-of-the-art":[136],"preference-based":[137],"RL":[138],"techniques.":[141],"experimental":[143],"findings":[144],"reveal":[145],"that":[146],"can":[148,188],"enhance":[149],"generalization":[151],"estimated":[154],"requiring":[158],"additional":[159],"intervention.":[161],"Lastly,":[162],"confirm":[164],"practical":[166],"applicability":[167],"approach,":[170],"conduct":[172],"experiments":[173,187],"involving":[174],"actual":[175],"users":[177],"simulated":[180],"social":[181],"navigation":[182],"setting.":[183],"Videos":[184],"found":[190],"at":[191],"https://sites.google.com/view/rl-sequel":[192]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
