{"id":"https://openalex.org/W4413945504","doi":"https://doi.org/10.1109/icra55743.2025.11128275","title":"Efficient Imitation Without Demonstrations via Value-Penalized Auxiliary Control from Examples","display_name":"Efficient Imitation Without Demonstrations via Value-Penalized Auxiliary Control from Examples","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413945504","doi":"https://doi.org/10.1109/icra55743.2025.11128275"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11128275","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128275","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036575954","display_name":"Trevor Ablett","orcid":"https://orcid.org/0000-0001-5904-8402"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Trevor Ablett","raw_affiliation_strings":["University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053699492","display_name":"Bryan Chan","orcid":"https://orcid.org/0000-0002-6278-9575"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Bryan Chan","raw_affiliation_strings":["University of Alberta,Department of Computing Science,Edmonton,Alberta,Canada,T6G 2E8"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Alberta,Department of Computing Science,Edmonton,Alberta,Canada,T6G 2E8","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111250655","display_name":"Jayce Haoran Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jayce Haoran Wang","raw_affiliation_strings":["University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011931977","display_name":"Jonathan W. Kelly","orcid":"https://orcid.org/0000-0002-4317-273X"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jonathan Kelly","raw_affiliation_strings":["University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10820333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2333","last_page":"2339"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.9276999831199646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9161999821662903,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5669071078300476},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5666897892951965},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5589181184768677},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5087998509407043},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28505194187164307},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.15740644931793213},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.06898483633995056}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5669071078300476},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5666897892951965},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5589181184768677},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5087998509407043},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28505194187164307},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.15740644931793213},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06898483633995056},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11128275","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128275","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1994349244","https://openalex.org/W2109910161","https://openalex.org/W2938421504","https://openalex.org/W2963099939","https://openalex.org/W2963411833","https://openalex.org/W3174849255","https://openalex.org/W4316021894","https://openalex.org/W4391827290"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2899084033","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2748952813","https://openalex.org/W1531601525"],"abstract_inverted_index":{"Common":[0],"approaches":[1,102],"to":[2],"providing":[3],"feedback":[4],"in":[5,49],"reinforcement":[6],"learning":[7,78],"are":[8],"the":[9,99],"use":[10,21],"of":[11,23,55,103],"hand-crafted":[12],"rewards":[13],"or":[14,107],"full-trajectory":[15],"expert":[16],"demonstrations.":[17],"Alternatively,":[18],"one":[19],"can":[20,30],"examples":[22,41,54],"completed":[24],"tasks,":[25,82],"but":[26],"such":[27],"an":[28,43,60],"approach":[29,75],"be":[31],"extremely":[32],"sample":[33],"inefficient.":[34],"We":[35],"introduce":[36],"value-penalized":[37],"auxiliary":[38,57],"control":[39,51],"from":[40],"(VPACE),":[42],"algorithm":[44],"that":[45,73,92],"significantly":[46],"improves":[47,77],"exploration":[48],"example-based":[50],"by":[52],"adding":[53],"simple":[56],"tasks":[58],"and":[59,67],"above-success-level":[61],"value":[62,86],"penalty.":[63],"Across":[64],"both":[65],"simulated":[66],"real":[68],"robotic":[69],"environments,":[70],"we":[71],"show":[72],"our":[74],"substantially":[76],"efficiency":[79],"for":[80],"challenging":[81],"while":[83],"maintaining":[84],"bounded":[85],"estimates.":[87],"Preliminary":[88],"results":[89],"also":[90],"suggest":[91],"VPACE":[93],"may":[94],"learn":[95],"more":[96,100],"efficiently":[97],"than":[98],"common":[101],"using":[104],"full":[105],"trajectories":[106],"true":[108],"sparse":[109],"rewards.":[110],"Project":[111],"site:":[112],"https://papers.starslab.ca/vpace/.":[113]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
