{"id":"https://openalex.org/W2963129648","doi":"https://doi.org/10.1109/lra.2018.2869644","title":"Reinforced Imitation: Sample Efficient Deep Reinforcement Learning for Mapless Navigation by Leveraging Prior Demonstrations","display_name":"Reinforced Imitation: Sample Efficient Deep Reinforcement Learning for Mapless Navigation by Leveraging Prior Demonstrations","publication_year":2018,"publication_date":"2018-09-13","ids":{"openalex":"https://openalex.org/W2963129648","doi":"https://doi.org/10.1109/lra.2018.2869644","mag":"2963129648"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2018.2869644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2869644","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040054567","display_name":"Mark G. Pfeiffer","orcid":"https://orcid.org/0000-0003-0276-324X"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Mark Pfeiffer","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-0276-324X","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108013003","display_name":"Samarth Shukla","orcid":"https://orcid.org/0000-0002-1670-5211"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Samarth Shukla","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-1670-5211","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026401995","display_name":"Matteo Turchetta","orcid":"https://orcid.org/0000-0001-5881-3096"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Matteo Turchetta","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0001-5881-3096","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063529163","display_name":"C\u00e9sar Cadena","orcid":"https://orcid.org/0000-0002-2972-6011"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Cesar Cadena","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-2972-6011","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003040843","display_name":"Andreas Krause","orcid":"https://orcid.org/0000-0001-7260-9673"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Andreas Krause","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083003222","display_name":"Roland Siegwart","orcid":"https://orcid.org/0000-0002-2760-7983"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Roland Siegwart","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-2760-7983","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059792297","display_name":"Juan Nieto","orcid":"https://orcid.org/0000-0003-4808-0831"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Juan Nieto","raw_affiliation_strings":["Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-4808-0831","affiliations":[{"raw_affiliation_string":"Autonomous Systems Lab, Computer Vision Lab, Learning and Adaptive Systems Group, and Max Planck ETH Center for Learning Systems, ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.7983,"has_fulltext":false,"cited_by_count":198,"citation_normalized_percentile":{"value":0.98342098,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"3","issue":"4","first_page":"4423","last_page":"4430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8333206176757812},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7592679262161255},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6655831336975098},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6202395558357239},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5613070726394653},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5504662394523621},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.5367357134819031},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4615805745124817},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4313107132911682}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8333206176757812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7592679262161255},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6655831336975098},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6202395558357239},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5613070726394653},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5504662394523621},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.5367357134819031},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4615805745124817},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4313107132911682},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2018.2869644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2869644","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1424654272","https://openalex.org/W1504697605","https://openalex.org/W1771410628","https://openalex.org/W1931877416","https://openalex.org/W1976953317","https://openalex.org/W1980969546","https://openalex.org/W2087751887","https://openalex.org/W2119112357","https://openalex.org/W2133233905","https://openalex.org/W2137549137","https://openalex.org/W2142828048","https://openalex.org/W2218761850","https://openalex.org/W2227909145","https://openalex.org/W2398883595","https://openalex.org/W2524241275","https://openalex.org/W2580495915","https://openalex.org/W2604216058","https://openalex.org/W2757515096","https://openalex.org/W2770679144","https://openalex.org/W2901136733","https://openalex.org/W2962887844","https://openalex.org/W2962951365","https://openalex.org/W2962954724","https://openalex.org/W2963019567","https://openalex.org/W2963277051","https://openalex.org/W2963428623","https://openalex.org/W2963713397","https://openalex.org/W2964043796","https://openalex.org/W3099533866","https://openalex.org/W4230563027","https://openalex.org/W4250696540","https://openalex.org/W4293545785","https://openalex.org/W4294555834","https://openalex.org/W4297795161","https://openalex.org/W6638018090","https://openalex.org/W6640174482","https://openalex.org/W6679847170","https://openalex.org/W6688685930","https://openalex.org/W6692846177","https://openalex.org/W6712227361","https://openalex.org/W6718092244","https://openalex.org/W6731334075","https://openalex.org/W6737893269","https://openalex.org/W6738483526","https://openalex.org/W6744844802","https://openalex.org/W6746348303","https://openalex.org/W6756486208"],"related_works":["https://openalex.org/W3173606202","https://openalex.org/W1531601525","https://openalex.org/W2607437843","https://openalex.org/W2963162727","https://openalex.org/W2877093712","https://openalex.org/W2116157560","https://openalex.org/W2106714532","https://openalex.org/W4297979791","https://openalex.org/W4310614650","https://openalex.org/W4386738330"],"abstract_inverted_index":{"This":[0],"letter":[1],"presents":[2],"a":[3,7,27,45,84,90,110,148],"case":[4],"study":[5],"of":[6,29,76,86,93,96,128],"learning-based":[8],"approach":[9],"for":[10,62],"target-driven":[11],"mapless":[12],"navigation.":[13],"The":[14,152],"underlying":[15],"navigation":[16,130,154],"model":[17,120],"is":[18,24,156],"an":[19],"end-to-end":[20],"neural":[21],"network,":[22],"which":[23],"trained":[25],"using":[26,143,147],"combination":[28],"expert":[30,60,97],"demonstrations,":[31,98],"imitation":[32],"learning":[33,37],"(IL)":[34],"and":[35,41,49,102,108,162],"reinforcement":[36],"(RL).":[38],"While":[39],"RL":[40,82,100,135],"IL":[42],"suffer":[43],"from":[44],"large":[46],"sample":[47],"complexity":[48],"the":[50,66,73,77,118,126,134],"distribution":[51],"mismatch":[52],"problem,":[53],"respectively,":[54],"we":[55],"show":[56,116],"that":[57,117],"leveraging":[58],"prior":[59],"demonstrations":[61],"pretraining":[63],"can":[64,138],"reduce":[65],"training":[67],"time":[68],"to":[69,80,158,160],"reach":[70],"at":[71],"least":[72],"same":[74],"level":[75],"performance":[78],"compared":[79],"plain":[81],"by":[83,146],"factor":[85],"5.":[87],"We":[88],"present":[89],"thorough":[91],"evaluation":[92],"different":[94,99],"combinations":[95],"algorithms,":[101],"reward":[103,136,150],"functions,":[104],"both":[105,122],"in":[106,125],"simulation":[107],"on":[109],"real":[111],"robotic":[112],"platform.":[113],"Our":[114],"results":[115],"final":[119],"outperforms":[121],"standalone":[123],"approaches":[124],"amount":[127],"successful":[129],"tasks.":[131],"In":[132],"addition,":[133],"function":[137],"be":[139],"significantly":[140],"simplified":[141],"when":[142],"pretraining,":[144],"e.g.,":[145],"sparse":[149],"only.":[151],"learned":[153],"policy":[155],"able":[157],"generalize":[159],"unseen":[161],"real-world":[163],"environments.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":25},{"year":2023,"cited_by_count":24},{"year":2022,"cited_by_count":37},{"year":2021,"cited_by_count":32},{"year":2020,"cited_by_count":34},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
