{"id":"https://openalex.org/W4380715556","doi":"https://doi.org/10.1145/3580305.3599506","title":"Skill Disentanglement for Imitation Learning from Suboptimal Demonstrations","display_name":"Skill Disentanglement for Imitation Learning from Suboptimal Demonstrations","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4380715556","doi":"https://doi.org/10.1145/3580305.3599506"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599506","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599506","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.07919","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053042660","display_name":"Tianxiang Zhao","orcid":"https://orcid.org/0000-0003-4504-7809"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tianxiang Zhao","raw_affiliation_strings":["The Pennsylvania State University, State College, PA, USA"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103210504","display_name":"Wenchao Yu","orcid":"https://orcid.org/0000-0002-2480-448X"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenchao Yu","raw_affiliation_strings":["NEC-Labs America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011048500","display_name":"Suhang Wang","orcid":"https://orcid.org/0000-0003-3448-4878"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suhang Wang","raw_affiliation_strings":["The Pennsylvania State University, State College, PA, USA"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083964854","display_name":"Lu Wang","orcid":"https://orcid.org/0000-0002-7305-1496"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Wang","raw_affiliation_strings":["East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060725887","display_name":"X. D. Zhang","orcid":"https://orcid.org/0000-0003-0940-6595"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiang Zhang","raw_affiliation_strings":["The Pennsylvania State University, State College, PA, USA"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101645187","display_name":"Yuncong Chen","orcid":"https://orcid.org/0000-0001-5111-3716"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuncong Chen","raw_affiliation_strings":["NEC-Labs America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101594832","display_name":"Yanchi Liu","orcid":"https://orcid.org/0000-0003-4396-5139"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanchi Liu","raw_affiliation_strings":["NEC-Labs America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037724644","display_name":"Wei Cheng","orcid":"https://orcid.org/0000-0001-5456-626X"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Cheng","raw_affiliation_strings":["NEC-Labs America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100456786","display_name":"Haifeng Chen","orcid":"https://orcid.org/0000-0002-9363-738X"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haifeng Chen","raw_affiliation_strings":["NEC-Labs America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5053042660"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":0.8641,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78341518,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3513","last_page":"3524"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8521031141281128},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7855864763259888},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5642813444137573},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5634459853172302},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5188525915145874},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5177639722824097},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.4869014322757721},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.47332674264907837}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8521031141281128},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7855864763259888},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5642813444137573},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5634459853172302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5188525915145874},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5177639722824097},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.4869014322757721},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.47332674264907837},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3580305.3599506","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599506","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2306.07919","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.07919","pdf_url":"https://arxiv.org/pdf/2306.07919","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.07919","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.07919","pdf_url":"https://arxiv.org/pdf/2306.07919","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2353918249","display_name":null,"funder_award_id":"IIS-1909702","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4271711841","display_name":null,"funder_award_id":"-1909702","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5700204612","display_name":null,"funder_award_id":"IIS-190","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7366345995","display_name":null,"funder_award_id":"1909702","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8080813138","display_name":null,"funder_award_id":"W911NF21-1-0198","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8998121839","display_name":null,"funder_award_id":"911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380715556.pdf","grobid_xml":"https://content.openalex.org/works/W4380715556.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W2098774185","https://openalex.org/W2109910161","https://openalex.org/W2111020392","https://openalex.org/W2147544021","https://openalex.org/W2155653793","https://openalex.org/W2201912979","https://openalex.org/W2280404143","https://openalex.org/W2395738761","https://openalex.org/W2573393487","https://openalex.org/W2592373391","https://openalex.org/W2594829461","https://openalex.org/W2751444262","https://openalex.org/W2788741142","https://openalex.org/W2887997457","https://openalex.org/W2899867782","https://openalex.org/W2914824760","https://openalex.org/W2997088366","https://openalex.org/W3003533476","https://openalex.org/W3004534398","https://openalex.org/W3012628688","https://openalex.org/W3082132890","https://openalex.org/W3127561923","https://openalex.org/W3164747806","https://openalex.org/W3183876471","https://openalex.org/W3199243876","https://openalex.org/W3209866168","https://openalex.org/W3210702899","https://openalex.org/W4213384663","https://openalex.org/W4224316953","https://openalex.org/W4226373607","https://openalex.org/W4281620485","https://openalex.org/W4286562657","https://openalex.org/W4286899287","https://openalex.org/W4312045499","https://openalex.org/W6631190155","https://openalex.org/W6683443546","https://openalex.org/W6683821272","https://openalex.org/W6718092244","https://openalex.org/W6727349600","https://openalex.org/W6752051073","https://openalex.org/W6753579488","https://openalex.org/W6764724164","https://openalex.org/W6789793951","https://openalex.org/W6790829992","https://openalex.org/W6802853800","https://openalex.org/W6803978876"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W1986582023","https://openalex.org/W2966829450"],"abstract_inverted_index":{"Imitation":[0],"learning":[1,221],"has":[2],"achieved":[3],"great":[4],"success":[5],"in":[6,11,53,58,197,220],"many":[7,90],"sequential":[8],"decision-making":[9],"tasks,":[10],"which":[12],"a":[13,28,42,70,76,94,145,152,163,187,210],"neural":[14],"agent":[15],"is":[16,160],"learned":[17,113,231],"by":[18,124,166,229],"imitating":[19,127],"collected":[20],"human":[21],"demonstrations.":[22],"However,":[23],"existing":[24],"algorithms":[25],"typically":[26],"require":[27],"large":[29,77],"number":[30],"of":[31,65,98,135,144,218],"high-quality":[32],"demonstrations":[33,172,224],"that":[34],"are":[35,192,203],"difficult":[36],"and":[37,51,75,105,126,151,159,173,186,209,225],"expensive":[38],"to":[39,45,96,107,148,155,178,194,214],"collect.":[40],"Usually,":[41],"trade-off":[43],"needs":[44],"be":[46,97],"made":[47],"between":[48],"demonstration":[49,73,95],"quality":[50,137],"quantity":[52],"practice.":[54],"Targeting":[55],"this":[56,59],"problem,":[57],"work":[60],"we":[61,121],"consider":[62],"the":[63,99,115,129,176,180,198,216],"imitation":[64],"sub-optimal":[66,223],"demonstrations,":[67],"with":[68,170],"both":[69],"small":[71],"clean":[72,181],"set":[74],"noisy":[78,116],"set.":[79,117,182],"Some":[80],"pioneering":[81],"works":[82],"have":[83],"been":[84],"proposed,":[85],"but":[86],"they":[87],"suffer":[88],"from":[89,114,222],"limitations,":[91],"e.g.,":[92],"assuming":[93],"same":[100],"optimality":[101,190],"throughout":[102],"time":[103],"steps":[104],"failing":[106],"provide":[108],"any":[109],"interpretation":[110],"w.r.t":[111],"knowledge":[112],"Addressing":[118],"these":[119],"problems,":[120],"propose":[122],"\\method":[123],"evaluating":[125],"at":[128],"sub-demonstration":[130,189],"level,":[131],"encoding":[132],"action":[133],"primitives":[134],"varying":[136],"into":[138],"different":[139],"skills.":[140,232],"Concretely,":[141],"SDIL":[142,219],"consists":[143],"high-level":[146],"controller":[147,177],"discover":[149],"skills":[150,169],"skill-conditioned":[153],"module":[154],"capture":[156],"action-taking":[157],"policies,":[158],"trained":[161],"following":[162],"two-phase":[164],"pipeline":[165],"first":[167],"discovering":[168],"all":[171],"then":[174],"adapting":[175],"only":[179],"A":[183],"mutual-information-based":[184],"regularization":[185],"dynamic":[188],"estimator":[191],"designed":[193],"promote":[195],"disentanglement":[196],"skill":[199],"space.":[200],"Extensive":[201],"experiments":[202],"conducted":[204],"over":[205],"two":[206],"gym":[207],"environments":[208],"real-world":[211],"healthcare":[212],"dataset":[213],"demonstrate":[215],"superiority":[217],"its":[226],"improved":[227],"interpretability":[228],"examining":[230]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
