{"id":"https://openalex.org/W4401415577","doi":"https://doi.org/10.1109/icra57147.2024.10610528","title":"Contrastive Initial State Buffer for Reinforcement Learning","display_name":"Contrastive Initial State Buffer for Reinforcement Learning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415577","doi":"https://doi.org/10.1109/icra57147.2024.10610528"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045918081","display_name":"Nico Messikommer","orcid":"https://orcid.org/0000-0003-1444-1176"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Nico Messikommer","raw_affiliation_strings":["University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland"],"affiliations":[{"raw_affiliation_string":"University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085971429","display_name":"Yunlong Song","orcid":"https://orcid.org/0000-0002-6352-3744"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Yunlong Song","raw_affiliation_strings":["University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland"],"affiliations":[{"raw_affiliation_string":"University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057116316","display_name":"Davide Scaramuzza","orcid":"https://orcid.org/0000-0002-3831-6778"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Davide Scaramuzza","raw_affiliation_strings":["University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland"],"affiliations":[{"raw_affiliation_string":"University of Zurich,Robotics and Perception Group, Department of Informatics,Switzerland","institution_ids":["https://openalex.org/I202697423"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5045918081"],"corresponding_institution_ids":["https://openalex.org/I202697423"],"apc_list":null,"apc_paid":null,"fwci":2.9009,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91931568,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2866","last_page":"2872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8120999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8120999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7941122055053711},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7054170966148376},{"id":"https://openalex.org/keywords/buffer","display_name":"Buffer (optical fiber)","score":0.6263384819030762},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4907315969467163},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4855744242668152},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4696235954761505},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.10479432344436646},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09521955251693726},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.059014737606048584},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.05388909578323364}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7941122055053711},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7054170966148376},{"id":"https://openalex.org/C145018004","wikidata":"https://www.wikidata.org/wiki/Q4985944","display_name":"Buffer (optical fiber)","level":2,"score":0.6263384819030762},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4907315969467163},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4855744242668152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4696235954761505},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.10479432344436646},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09521955251693726},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.059014737606048584},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.05388909578323364}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334678","display_name":"European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2201581102","https://openalex.org/W2607662938","https://openalex.org/W2738195924","https://openalex.org/W2747329762","https://openalex.org/W2796290181","https://openalex.org/W2894766094","https://openalex.org/W2904157920","https://openalex.org/W2911087563","https://openalex.org/W2963276097","https://openalex.org/W2963439114","https://openalex.org/W3018036994","https://openalex.org/W3082697078","https://openalex.org/W3100789280","https://openalex.org/W3104515094","https://openalex.org/W3126321819","https://openalex.org/W3129322645","https://openalex.org/W3207452936","https://openalex.org/W3213974477","https://openalex.org/W4205430897","https://openalex.org/W4206742276","https://openalex.org/W4297808394","https://openalex.org/W4298206671","https://openalex.org/W4300799055","https://openalex.org/W4307823382","https://openalex.org/W4312441173","https://openalex.org/W4386285856","https://openalex.org/W4386718967","https://openalex.org/W4401414310","https://openalex.org/W4401417288","https://openalex.org/W6627932998","https://openalex.org/W6638018090","https://openalex.org/W6687681856","https://openalex.org/W6717230150","https://openalex.org/W6740801417","https://openalex.org/W6741302124","https://openalex.org/W6751494529","https://openalex.org/W6756303580","https://openalex.org/W6757288412","https://openalex.org/W6758854760","https://openalex.org/W6771807793","https://openalex.org/W6782364735","https://openalex.org/W6800004206","https://openalex.org/W6801964084","https://openalex.org/W6838065111","https://openalex.org/W6844194202"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2097227107","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291"],"abstract_inverted_index":{"In":[0],"Reinforcement":[1],"Learning,":[2],"the":[3,36,47,53,73,76,103,137],"trade-off":[4],"between":[5],"exploration":[6],"and":[7,68,114],"exploitation":[8],"poses":[9],"a":[10,56,108,116,121],"complex":[11,93],"challenge":[12],"for":[13,30,42],"achieving":[14],"efficient":[15],"learning":[16],"from":[17,65],"limited":[18],"samples.":[19],"While":[20],"recent":[21],"works":[22],"have":[23],"been":[24],"effective":[25],"in":[26,75,78],"leveraging":[27],"past":[28,40,66],"experiences":[29,41,67],"policy":[31],"updates,":[32],"they":[33],"often":[34],"overlook":[35],"potential":[37],"of":[38,46,55,107],"reusing":[39],"data":[43],"collection.":[44],"Independent":[45],"underlying":[48],"RL":[49],"algorithm,":[50],"we":[51],"introduce":[52],"concept":[54],"Contrastive":[57],"Initial":[58],"State":[59],"Buffer,":[60],"which":[61],"strategically":[62],"selects":[63],"states":[64],"uses":[69],"them":[70],"to":[71,80],"initialize":[72],"agent":[74],"environment":[77],"order":[79],"guide":[81],"it":[82],"toward":[83],"more":[84],"informative":[85],"states.":[86],"We":[87],"validate":[88],"our":[89,128],"approach":[90],"on":[91,98],"two":[92],"robotic":[94],"tasks":[95],"without":[96],"relying":[97],"any":[99],"prior":[100],"information":[101],"about":[102],"environment:":[104],"(i)":[105],"locomotion":[106],"quadruped":[109],"robot":[110],"traversing":[111],"challenging":[112],"terrains":[113],"(ii)":[115],"quadcopter":[117],"drone":[118],"racing":[119],"through":[120],"track.":[122],"The":[123],"experimental":[124],"results":[125],"show":[126],"that":[127],"initial":[129],"state":[130],"buffer":[131],"achieves":[132],"higher":[133],"task":[134],"performance":[135],"than":[136],"nominal":[138],"baseline":[139],"while":[140],"also":[141],"speeding":[142],"up":[143],"training":[144],"convergence.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
