{"id":"https://openalex.org/W3210520478","doi":"https://doi.org/10.1145/3493700.3493716","title":"Smooth Imitation Learning via Smooth Costs and Smooth Policies","display_name":"Smooth Imitation Learning via Smooth Costs and Smooth Policies","publication_year":2022,"publication_date":"2022-01-07","ids":{"openalex":"https://openalex.org/W3210520478","doi":"https://doi.org/10.1145/3493700.3493716","mag":"3210520478"},"language":"en","primary_location":{"id":"doi:10.1145/3493700.3493716","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3493700.3493716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Joint International Conference on Data Science &amp; Management of Data (9th ACM IKDD CODS and 27th COMAD)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2111.02354","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078915483","display_name":"Sapana Chaudhary","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sapana Chaudhary","raw_affiliation_strings":["Electrical and Computer Engineering, Texas A&amp;M University, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Texas A&amp;M University, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009374923","display_name":"Balaraman Ravindran","orcid":"https://orcid.org/0000-0002-5364-7639"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]},{"id":"https://openalex.org/I4210151956","display_name":"Robert Bosch (India)","ror":"https://ror.org/04my8ty22","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210151956","https://openalex.org/I889804353"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Balaraman Ravindran","raw_affiliation_strings":["Robert Bosch Centre for Data Science and AI, India and Computer Science and Engineering, Indian Institute of Technology Madras, India"],"affiliations":[{"raw_affiliation_string":"Robert Bosch Centre for Data Science and AI, India and Computer Science and Engineering, Indian Institute of Technology Madras, India","institution_ids":["https://openalex.org/I4210151956","https://openalex.org/I24676775"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5078915483"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":0.276,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.59943443,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"63","last_page":"71"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9560999870300293,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.8681047558784485},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8507912158966064},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.7667003870010376},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7145333290100098},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6644550561904907},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5736494660377502},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4734603762626648},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4721158742904663},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.46509090065956116},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4561137855052948},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.41709017753601074},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.37798476219177246},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37019315361976624},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2922525405883789},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22302520275115967},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.11086821556091309}],"concepts":[{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.8681047558784485},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8507912158966064},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.7667003870010376},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7145333290100098},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6644550561904907},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5736494660377502},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4734603762626648},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4721158742904663},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.46509090065956116},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4561137855052948},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.41709017753601074},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.37798476219177246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37019315361976624},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2922525405883789},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22302520275115967},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.11086821556091309},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3493700.3493716","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3493700.3493716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Joint International Conference on Data Science &amp; Management of Data (9th ACM IKDD CODS and 27th COMAD)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2111.02354","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.02354","pdf_url":"https://arxiv.org/pdf/2111.02354","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2111.02354","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.02354","pdf_url":"https://arxiv.org/pdf/2111.02354","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W112666333","https://openalex.org/W139596442","https://openalex.org/W165601924","https://openalex.org/W1191599655","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1575592356","https://openalex.org/W1684361744","https://openalex.org/W1771410628","https://openalex.org/W1931877416","https://openalex.org/W1986014385","https://openalex.org/W1999874108","https://openalex.org/W2031571562","https://openalex.org/W2039708501","https://openalex.org/W2043615441","https://openalex.org/W2061562262","https://openalex.org/W2098774185","https://openalex.org/W2108734173","https://openalex.org/W2117481033","https://openalex.org/W2117675763","https://openalex.org/W2119567691","https://openalex.org/W2124267516","https://openalex.org/W2142641780","https://openalex.org/W2158782408","https://openalex.org/W2162717641","https://openalex.org/W2166302491","https://openalex.org/W2173248099","https://openalex.org/W2174803659","https://openalex.org/W2290104316","https://openalex.org/W2417264409","https://openalex.org/W2436270489","https://openalex.org/W2736601468","https://openalex.org/W2757519139","https://openalex.org/W2781726626","https://openalex.org/W2822752092","https://openalex.org/W2913266441","https://openalex.org/W2914316834","https://openalex.org/W2930826253","https://openalex.org/W2954540134","https://openalex.org/W2962369866","https://openalex.org/W2962879692","https://openalex.org/W2962902376","https://openalex.org/W2962957031","https://openalex.org/W2963277051","https://openalex.org/W2963508354","https://openalex.org/W2963590100","https://openalex.org/W2963864421","https://openalex.org/W2964121744","https://openalex.org/W2964159205","https://openalex.org/W2964201867","https://openalex.org/W2989929945","https://openalex.org/W3012708491","https://openalex.org/W3035204084","https://openalex.org/W3035493476","https://openalex.org/W3038629022","https://openalex.org/W4211008118","https://openalex.org/W4247950230","https://openalex.org/W4256613753","https://openalex.org/W4294184067","https://openalex.org/W4295521014","https://openalex.org/W4297664295","https://openalex.org/W4298023569","https://openalex.org/W4394666657"],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2778153218","https://openalex.org/W1531601525","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886"],"abstract_inverted_index":{"Imitation":[0,146],"learning":[1,27,195],"(IL)":[2],"is":[3,36,55,126],"a":[4,52,58,61,78,113,117,153],"popular":[5],"approach":[6],"in":[7,25,112],"the":[8,18,47,92,95,108,123,132,158,161,180],"continuous":[9,70,170],"control":[10,71,171],"setting":[11],"as":[12,57,116],"among":[13],"other":[14],"reasons":[15],"it":[16],"circumvents":[17],"problems":[19],"of":[20,60,67,98,119,160,193],"reward":[21],"mis-specification":[22],"and":[23,94,122,144,196],"exploration":[24],"reinforcement":[26],"(RL).":[28],"In":[29],"IL":[30,140,182],"from":[31,173],"demonstrations,":[32],"an":[33],"important":[34],"challenge":[35],"to":[37,46,131,156],"obtain":[38],"agent":[39,124],"policies":[40],"that":[41,54,107],"are":[42],"smooth":[43,56,139],"with":[44,129],"respect":[45,130],"inputs.":[48],"Learning":[49,147],"through":[50],"imitation":[51,100],"policy":[53,93,125],"function":[59,110,118],"large":[62],"state-action":[63],"(s-a)":[64],"space":[65],"(typical":[66],"high":[68],"dimensional":[69],"environments)":[72],"can":[73],"be":[74],"challenging.":[75],"We":[76,135,151,164],"take":[77],"first":[79],"step":[80],"towards":[81],"tackling":[82],"this":[83],"issue":[84],"by":[85,105],"using":[86],"smoothness":[87,159,187],"inducing":[88],"regularizers":[89,103],"on":[90,169,184],"both":[91],"cost":[96,109],"models":[97],"adversarial":[99],"learning.":[101],"Our":[102],"work":[104],"ensuring":[106],"changes":[111],"controlled":[114],"manner":[115],"s-a":[120],"space;":[121],"well":[127],"behaved":[128],"state":[133],"space.":[134],"call":[136],"our":[137,185],"new":[138],"algorithm":[141,176,183],"Smooth":[142],"Policy":[143],"Cost":[145],"(SPaCIL,":[148],"pronounced":[149],"\u201cSpecial\u201d).":[150],"introduce":[152],"novel":[154],"metric":[155],"quantify":[157],"learned":[162],"policies.":[163],"demonstrate":[165],"SPaCIL\u2019s":[166],"superior":[167],"performance":[168],"tasks":[172],"MuJoCo.":[174],"The":[175],"not":[177],"just":[178],"outperforms":[179],"state-of-the-art":[181],"proposed":[186],"metric,":[188],"but,":[189],"enjoys":[190],"added":[191],"benefits":[192],"faster":[194],"substantially":[197],"higher":[198],"average":[199],"return.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
