{"id":"https://openalex.org/W4401857047","doi":"https://doi.org/10.1145/3637528.3672059","title":"Offline Imitation Learning with Model-based Reverse Augmentation","display_name":"Offline Imitation Learning with Model-based Reverse Augmentation","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401857047","doi":"https://doi.org/10.1145/3637528.3672059"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3672059","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3672059","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087294333","display_name":"Jie-Jing Shao","orcid":"https://orcid.org/0000-0001-8107-114X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie-Jing Shao","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108783224","display_name":"H. C. Shi","orcid":"https://orcid.org/0009-0009-1349-586X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao-Sen Shi","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047808444","display_name":"Lan-Zhe Guo","orcid":"https://orcid.org/0000-0001-8965-1288"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lan-Zhe Guo","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, School of Intelligence Science and Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, School of Intelligence Science and Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100355152","display_name":"Yu-Feng Li","orcid":"https://orcid.org/0000-0002-7727-4304"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu-Feng Li","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, School of Artificial Intelligence, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, School of Artificial Intelligence, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087294333"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":1.2464,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80739028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2608","last_page":"2617"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7267681360244751},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5805341005325317},{"id":"https://openalex.org/keywords/reverse-engineering","display_name":"Reverse engineering","score":0.4162582457065582},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39440983533859253},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12684091925621033},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10279539227485657}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7267681360244751},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5805341005325317},{"id":"https://openalex.org/C207850805","wikidata":"https://www.wikidata.org/wiki/Q269608","display_name":"Reverse engineering","level":2,"score":0.4162582457065582},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39440983533859253},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12684091925621033},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10279539227485657},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3672059","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3672059","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/2","score":0.5799999833106995,"display_name":"Zero hunger"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W189742998","https://openalex.org/W1583837637","https://openalex.org/W2018987210","https://openalex.org/W2038901583","https://openalex.org/W2620239873","https://openalex.org/W2976205474","https://openalex.org/W3034819467","https://openalex.org/W3138984732","https://openalex.org/W3157893055","https://openalex.org/W3178520484","https://openalex.org/W3216656735","https://openalex.org/W4210444794","https://openalex.org/W4212774754","https://openalex.org/W4225737910","https://openalex.org/W4226507192","https://openalex.org/W4307648541","https://openalex.org/W6600727173"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2748952813","https://openalex.org/W1531601525"],"abstract_inverted_index":{"In":[0],"offline":[1,131,148,229],"Imitation":[2,132],"Learning":[3,133],"(IL),":[4],"one":[5],"of":[6,43,84],"the":[7,11,15,19,24,40,44,48,56,62,82,97,147,157,167,175,191,207,219,224,228],"main":[8],"challenges":[9],"is":[10,28],"covariate":[12,220],"shift":[13,221],"between":[14],"expert":[16,45,85,208],"observations":[17],"and":[18,54,76,178,222],"actual":[20],"distribution":[21,42],"encountered":[22],"by":[23],"agent,":[25],"because":[26,100],"it":[27],"difficult":[29],"to":[30,60,105,156,172,183],"determine":[31],"what":[32],"action":[33,113],"an":[34],"agent":[35],"should":[36],"take":[37],"when":[38],"outside":[39],"state":[41],"demonstrations.":[46,86],"Recently,":[47],"model-free":[49],"solutions":[50,68],"introduced":[51],"supplementary":[52],"data":[53],"identified":[55],"latent":[57],"expert-similar":[58],"samples":[59,64],"augment":[61],"reliable":[63],"during":[65],"learning.":[66],"Model-based":[67],"build":[69,141],"forward":[70],"dynamic":[71,144],"models":[72],"with":[73,134],"conservatism":[74],"quantification":[75],"then":[77],"generate":[78,153],"additional":[79],"trajectories":[80,154,177],"in":[81,96,102,160],"neighborhood":[83],"However,":[87],"without":[88],"reward":[89],"supervision,":[90],"these":[91,201],"methods":[92],"are":[93],"often":[94],"over-conservative":[95],"out-of-expert-support":[98],"regions,":[99],"only":[101,189],"states":[103,107,159,182,193],"close":[104],"expert-observed":[106,158,184],"can":[108,151],"there":[109],"be":[110],"a":[111,126,142,161],"preferred":[112],"enabling":[114,204],"policy":[115],"optimization.":[116],"To":[117],"encourage":[118],"more":[119],"exploration":[120],"on":[121,200,227],"expert-unobserved":[122,181,192],"states,":[123,202],"we":[124,140,165],"propose":[125],"novel":[127],"model-based":[128],"framework,":[129],"called":[130],"Self-paced":[135],"Reverse":[136],"Augmentation":[137],"(SRA).":[138],"Specifically,":[139],"reverse":[143],"model":[145],"from":[146,174,180],"demonstrations,":[149],"which":[150],"efficiently":[152],"leading":[155],"self-paced":[162],"style.":[163],"Then,":[164],"use":[166],"subsequent":[168],"reinforcement":[169],"learning":[170,231],"method":[171],"learn":[173],"augmented":[176],"transit":[179],"states.":[185],"This":[186],"framework":[187],"not":[188],"explores":[190],"but":[194],"also":[195],"guides":[196],"maximizing":[197],"long-term":[198],"returns":[199],"ultimately":[203],"generalization":[205],"beyond":[206],"data.":[209],"Empirical":[210],"results":[211],"show":[212],"that":[213],"our":[214],"proposal":[215],"could":[216],"effectively":[217],"mitigate":[218],"achieve":[223],"state-of-the-art":[225],"performance":[226],"imitation":[230],"benchmarks.":[232],"Project":[233],"website:":[234],"https://www.lamda.nju.edu.cn/shaojj/KDD24_SRA/.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
