{"id":"https://openalex.org/W2947376031","doi":"https://doi.org/10.24963/ijcai.2019/374","title":"Interactive Teaching Algorithms for Inverse Reinforcement Learning","display_name":"Interactive Teaching Algorithms for Inverse Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-28","ids":{"openalex":"https://openalex.org/W2947376031","doi":"https://doi.org/10.24963/ijcai.2019/374","mag":"2947376031"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2019/374","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/374","pdf_url":"https://www.ijcai.org/proceedings/2019/0374.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2019/0374.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090876613","display_name":"Parameswaran Kamalaruban","orcid":"https://orcid.org/0000-0002-2929-7886"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Parameswaran Kamalaruban","raw_affiliation_strings":["LIONS, EPFL","\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne,"],"affiliations":[{"raw_affiliation_string":"LIONS, EPFL","institution_ids":[]},{"raw_affiliation_string":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne,","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005193721","display_name":"Rati Devidze","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]},{"id":"https://openalex.org/I4210121786","display_name":"Max Planck Institute for Software Systems","ror":"https://ror.org/02pe2kf23","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210121786"]}],"countries":["CH","DE"],"is_corresponding":false,"raw_author_name":"Rati Devidze","raw_affiliation_strings":["Max Planck Institute for Software Systems (MPI-SWS)","\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne,"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Software Systems (MPI-SWS)","institution_ids":["https://openalex.org/I4210121786"]},{"raw_affiliation_string":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne,","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027059837","display_name":"Volkan Cevher","orcid":"https://orcid.org/0000-0002-5004-201X"},"institutions":[{"id":"https://openalex.org/I4210121786","display_name":"Max Planck Institute for Software Systems","ror":"https://ror.org/02pe2kf23","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210121786"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Volkan Cevher","raw_affiliation_strings":["LIONS, EPFL","Max Planck Institute for Software Systems,#TAB#"],"affiliations":[{"raw_affiliation_string":"LIONS, EPFL","institution_ids":[]},{"raw_affiliation_string":"Max Planck Institute for Software Systems,#TAB#","institution_ids":["https://openalex.org/I4210121786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027711113","display_name":"Adish Singla","orcid":"https://orcid.org/0000-0001-9922-0668"},"institutions":[{"id":"https://openalex.org/I4210121786","display_name":"Max Planck Institute for Software Systems","ror":"https://ror.org/02pe2kf23","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210121786"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Adish Singla","raw_affiliation_strings":["Max Planck Institute for Software Systems (MPI-SWS)","Max Planck Institute for Software Systems,#TAB#"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Software Systems (MPI-SWS)","institution_ids":["https://openalex.org/I4210121786"]},{"raw_affiliation_string":"Max Planck Institute for Software Systems,#TAB#","institution_ids":["https://openalex.org/I4210121786"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090876613"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":1.1566,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.84066057,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2692","last_page":"2700"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7202243208885193},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6637155413627625},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5940074324607849},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.576332151889801},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5382961630821228},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.48205527663230896},{"id":"https://openalex.org/keywords/sequence-learning","display_name":"Sequence learning","score":0.4212872087955475},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.4056745171546936},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3466659188270569},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33817702531814575},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13571491837501526},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08774960041046143}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7202243208885193},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6637155413627625},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5940074324607849},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.576332151889801},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5382961630821228},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.48205527663230896},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.4212872087955475},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.4056745171546936},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3466659188270569},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33817702531814575},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13571491837501526},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08774960041046143},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.24963/ijcai.2019/374","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/374","pdf_url":"https://www.ijcai.org/proceedings/2019/0374.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.11867","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.11867","pdf_url":"https://arxiv.org/pdf/1905.11867","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"mag:2947376031","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1905.11867","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:pure.mpg.de:item_3217345","is_oa":false,"landing_page_url":"http://hdl.handle.net/21.11116/0000-0005-F96D-9","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"doi:10.48550/arxiv.1905.11867","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.11867","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2019/374","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/374","pdf_url":"https://www.ijcai.org/proceedings/2019/0374.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7400000095367432,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G122785726","display_name":"Theory and methods for accurate and scalable learning machines","funder_award_id":"167319","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G1934557032","display_name":null,"funder_award_id":"407540_167319","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8674641499","display_name":"The Transformation of Public and Private European Competition Law","funder_award_id":"67319","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G879380985","display_name":"Topology of intracellular synthesis and plasma membrane dis- tribution of glycoconjugates","funder_award_id":"40754","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2947376031.pdf","grobid_xml":"https://content.openalex.org/works/W2947376031.grobid-xml"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W64088143","https://openalex.org/W950880443","https://openalex.org/W1529399279","https://openalex.org/W1541165948","https://openalex.org/W1544882584","https://openalex.org/W1633675443","https://openalex.org/W1684361744","https://openalex.org/W1777239053","https://openalex.org/W1929981607","https://openalex.org/W1986014385","https://openalex.org/W1994434056","https://openalex.org/W1999874108","https://openalex.org/W2020764470","https://openalex.org/W2053910308","https://openalex.org/W2061562262","https://openalex.org/W2062427462","https://openalex.org/W2070469928","https://openalex.org/W2097625105","https://openalex.org/W2098774185","https://openalex.org/W2103104707","https://openalex.org/W2133068870","https://openalex.org/W2142544755","https://openalex.org/W2148112459","https://openalex.org/W2148886952","https://openalex.org/W2160178500","https://openalex.org/W2162009473","https://openalex.org/W2163602945","https://openalex.org/W2290104316","https://openalex.org/W2557026499","https://openalex.org/W2563829177","https://openalex.org/W2577466617","https://openalex.org/W2621205314","https://openalex.org/W2735318784","https://openalex.org/W2767127679","https://openalex.org/W2777985721","https://openalex.org/W2783793006","https://openalex.org/W2786676179","https://openalex.org/W2794908222","https://openalex.org/W2803661326","https://openalex.org/W2804214746","https://openalex.org/W2808642110","https://openalex.org/W2890752237","https://openalex.org/W2947376031","https://openalex.org/W2950230866","https://openalex.org/W2962687617","https://openalex.org/W2962930238","https://openalex.org/W2963277051","https://openalex.org/W2963289505","https://openalex.org/W2963308241","https://openalex.org/W2963590100","https://openalex.org/W2964138440","https://openalex.org/W2964237080","https://openalex.org/W6624823374","https://openalex.org/W6650265722","https://openalex.org/W6666314596","https://openalex.org/W6718092244","https://openalex.org/W6791858558","https://openalex.org/W6863994431"],"related_works":["https://openalex.org/W2966120739","https://openalex.org/W3212632099","https://openalex.org/W3012702477","https://openalex.org/W2890752237","https://openalex.org/W2988552651","https://openalex.org/W2951388782","https://openalex.org/W2951122980","https://openalex.org/W950880443","https://openalex.org/W3085123496","https://openalex.org/W2154633587","https://openalex.org/W2970469191","https://openalex.org/W3164908817","https://openalex.org/W2950229905","https://openalex.org/W3113875788","https://openalex.org/W2041509310","https://openalex.org/W3032597593","https://openalex.org/W3049505916","https://openalex.org/W2132632935","https://openalex.org/W2025257504","https://openalex.org/W3035463176"],"abstract_inverted_index":{"We":[0,50],"study":[1,104],"the":[2,10,14,26,47,61,89,97,109,122,135],"problem":[3],"of":[4,38,108,117],"inverse":[5],"reinforcement":[6],"learning":[7,48,136],"(IRL)":[8],"with":[9,127],"added":[11],"twist":[12],"that":[13,134],"learner":[15,43,112],"is":[16],"assisted":[17],"by":[18],"a":[19,32,57,83,93,105,128],"helpful":[20],"teacher.":[21,148],"More":[22],"formally,":[23],"we":[24,71,103],"tackle":[25],"following":[27],"algorithmic":[28],"question:":[29],"How":[30],"could":[31],"teacher":[33,58,84,98],"provide":[34],"an":[35,41,52,79,146],"informative":[36],"sequence":[37],"demonstrations":[39],"to":[40,44,145],"IRL":[42],"speed":[45],"up":[46,141],"process?":[49],"present":[51],"interactive":[53],"teaching":[54,73,119],"framework":[55],"where":[56,82,96],"adaptively":[59],"chooses":[60],"next":[62],"demonstration":[63],"based":[64],"on":[65],"learner's":[66,90],"current":[67],"policy.":[68],"In":[69],"particular,":[70],"design":[72],"algorithms":[74],"for":[75],"two":[76],"concrete":[77],"settings:":[78],"omniscient":[80,123],"setting":[81,95],"has":[85,99],"full":[86],"knowledge":[87],"about":[88],"dynamics":[91],"and":[92,113],"blackbox":[94],"minimal":[100],"knowledge.":[101],"Then,":[102],"sequential":[106],"variant":[107],"popular":[110],"MCE-IRL":[111],"prove":[114],"convergence":[115],"guarantees":[116],"our":[118],"algorithm":[120],"in":[121],"setting.":[124],"Extensive":[125],"experiments":[126],"car":[129],"driving":[130],"simulator":[131],"environment":[132],"show":[133],"progress":[137],"can":[138],"be":[139],"speeded":[140],"drastically":[142],"as":[143],"compared":[144],"uninformative":[147]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3}],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
