{"id":"https://openalex.org/W4399657811","doi":"https://doi.org/10.48550/arxiv.2406.08472","title":"RILe: Reinforced Imitation Learning","display_name":"RILe: Reinforced Imitation Learning","publication_year":2024,"publication_date":"2024-06-12","ids":{"openalex":"https://openalex.org/W4399657811","doi":"https://doi.org/10.48550/arxiv.2406.08472"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.08472","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08472","pdf_url":"https://arxiv.org/pdf/2406.08472","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.08472","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029685269","display_name":"Berat Mert Albaba","orcid":"https://orcid.org/0000-0002-3406-8412"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Albaba, Mert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048915340","display_name":"Sammy Christen","orcid":"https://orcid.org/0000-0002-3511-8565"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Christen, Sammy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099124267","display_name":"Thomas Langarek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Langarek, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008646925","display_name":"Christoph Gebhardt","orcid":"https://orcid.org/0000-0001-7162-0133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gebhardt, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025000908","display_name":"Otmar Hilliges","orcid":"https://orcid.org/0000-0002-5068-3474"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hilliges, Otmar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026037617","display_name":"Michael J. Black","orcid":"https://orcid.org/0000-0003-0571-0936"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Black, Michael J.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5029685269"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6690998673439026},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.19705671072006226},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.049017250537872314}],"concepts":[{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6690998673439026},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.19705671072006226},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.049017250537872314}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.08472","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08472","pdf_url":"https://arxiv.org/pdf/2406.08472","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.08472","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.08472","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.08472","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08472","pdf_url":"https://arxiv.org/pdf/2406.08472","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2665305151"],"abstract_inverted_index":{"Acquiring":[0],"complex":[1,180],"behaviors":[2,12],"is":[3,55],"essential":[4],"for":[5,34,87],"artificially":[6],"intelligent":[7],"agents,":[8],"yet":[9],"learning":[10,28,40,60,104,108],"these":[11],"in":[13,77,120,171,185],"high-dimensional":[14,78,121,172],"settings":[15],"poses":[16],"a":[17,63,96,111,125],"significant":[18],"challenge":[19],"due":[20],"to":[21,73,109,143,178],"the":[22,100,129,137,153,156],"vast":[23],"search":[24],"space.":[25],"Traditional":[26],"reinforcement":[27,39,107],"(RL)":[29],"requires":[30],"extensive":[31],"manual":[32],"effort":[33],"reward":[35,43,113,134,141],"function":[36,114],"engineering.":[37],"Inverse":[38],"(IRL)":[41],"uncovers":[42],"functions":[44],"from":[45],"expert":[46,74,145],"demonstrations":[47],"but":[48],"relies":[49],"on":[50],"an":[51,70,132],"iterative":[52],"process":[53],"that":[54,98,191],"often":[56,83],"computationally":[57],"expensive.":[58],"Imitation":[59,94],"(IL)":[61],"provides":[62,158],"more":[64],"efficient":[65],"alternative":[66],"by":[67],"directly":[68],"comparing":[69],"agent's":[71],"actions":[72],"demonstrations;":[75],"however,":[76],"environments,":[79],"such":[80],"direct":[81,175],"comparisons":[82],"offer":[84],"insufficient":[85],"feedback":[86,160],"effective":[88],"learning.":[89,165],"We":[90,182],"introduce":[91],"RILe":[92,123,184],"(Reinforced":[93],"Learning),":[95],"framework":[97,167],"combines":[99],"strengths":[101],"of":[102,164],"imitation":[103,176],"and":[105,116,136,197],"inverse":[106],"learn":[110],"dense":[112],"efficiently":[115],"achieve":[117],"strong":[118],"performance":[119,200],"tasks.":[122],"employs":[124],"novel":[126],"trainer-student":[127],"framework:":[128],"trainer":[130,157],"learns":[131],"adaptive":[133],"function,":[135],"student":[138,154],"uses":[139],"this":[140],"signal":[142],"imitate":[144],"behaviors.":[146,181],"By":[147],"dynamically":[148],"adjusting":[149],"its":[150],"guidance":[151],"as":[152],"evolves,":[155],"nuanced":[159],"across":[161,201],"different":[162],"phases":[163],"Our":[166],"produces":[168],"high-performing":[169],"policies":[170],"tasks":[173],"where":[174],"fails":[177],"replicate":[179],"validate":[183],"challenging":[186],"robotic":[187],"locomotion":[188],"tasks,":[189],"demonstrating":[190],"it":[192],"significantly":[193],"outperforms":[194],"existing":[195],"methods":[196],"achieves":[198],"near-expert":[199],"multiple":[202],"settings.":[203]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2025-10-10T00:00:00"}
