{"id":"https://openalex.org/W2803661326","doi":"https://doi.org/10.1609/aaai.v33i01.33017749","title":"Machine Teaching for Inverse Reinforcement Learning: Algorithms and Applications","display_name":"Machine Teaching for Inverse Reinforcement Learning: Algorithms and Applications","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2803661326","doi":"https://doi.org/10.1609/aaai.v33i01.33017749","mag":"2803661326"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33017749","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017749","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v33i01.33017749","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103065575","display_name":"Daniel S. Brown","orcid":"https://orcid.org/0000-0002-9570-1832"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel S. Brown","raw_affiliation_strings":["University of Texas at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["University of Texas at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1967,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83448742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"33","issue":"01","first_page":"7749","last_page":"7758"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7223954200744629},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6124466061592102},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5972392559051514},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5832814574241638},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5292291641235352},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4562860429286957},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.4459823966026306},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4402191936969757},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4347064197063446},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.4263346791267395},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.416251540184021},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1559147834777832}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7223954200744629},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6124466061592102},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5972392559051514},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5832814574241638},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5292291641235352},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4562860429286957},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.4459823966026306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4402191936969757},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4347064197063446},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.4263346791267395},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.416251540184021},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1559147834777832},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1609/aaai.v33i01.33017749","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017749","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1805.07687","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.07687","pdf_url":"https://arxiv.org/pdf/1805.07687","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2803661326","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1805.07687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1805.07687","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1805.07687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33017749","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017749","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7699999809265137,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G3609089545","display_name":null,"funder_award_id":"N00014-18-2243","funder_id":"https://openalex.org/F4320310620","funder_display_name":"University of Texas at Austin"},{"id":"https://openalex.org/G3770085111","display_name":"RI: Small: High Confidence, Efficient Learning Under Rich Task Specifications","funder_award_id":"1617639","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3881771821","display_name":null,"funder_award_id":"N00014-18-2243","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G5029475413","display_name":null,"funder_award_id":"IIS-1724157","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5910919248","display_name":"CAREER: Safe and Efficient Robot Learning from Demonstration in the Real World","funder_award_id":"1749204","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6928197803","display_name":"NRI: Collaborative Research: Scalable Robot Autonomy through Remote Operator Assistance and Lifelong Learning","funder_award_id":"1638107","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G829377574","display_name":null,"funder_award_id":"1724157","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310620","display_name":"University of Texas at Austin","ror":"https://ror.org/00hj54h04"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W102873864","https://openalex.org/W950880443","https://openalex.org/W1520461958","https://openalex.org/W1583136812","https://openalex.org/W1591675293","https://openalex.org/W1606056663","https://openalex.org/W1633675443","https://openalex.org/W1680189815","https://openalex.org/W1986014385","https://openalex.org/W1999874108","https://openalex.org/W2006912660","https://openalex.org/W2018623774","https://openalex.org/W2020764470","https://openalex.org/W2029828838","https://openalex.org/W2056354534","https://openalex.org/W2061562262","https://openalex.org/W2062525454","https://openalex.org/W2098774185","https://openalex.org/W2103104707","https://openalex.org/W2105156548","https://openalex.org/W2116442740","https://openalex.org/W2125299871","https://openalex.org/W2146250995","https://openalex.org/W2156163138","https://openalex.org/W2163037893","https://openalex.org/W2181849516","https://openalex.org/W2182055801","https://openalex.org/W2228499913","https://openalex.org/W2283935042","https://openalex.org/W2293844262","https://openalex.org/W2410842990","https://openalex.org/W2440926996","https://openalex.org/W2465040775","https://openalex.org/W2557026499","https://openalex.org/W2562989799","https://openalex.org/W2565370028","https://openalex.org/W2605076822","https://openalex.org/W2733870494","https://openalex.org/W2735318784","https://openalex.org/W2783793006","https://openalex.org/W2798750840","https://openalex.org/W2809134166","https://openalex.org/W2809461852","https://openalex.org/W2898621619","https://openalex.org/W2902909714","https://openalex.org/W2914331073","https://openalex.org/W2951122980","https://openalex.org/W2962717849","https://openalex.org/W2963099438","https://openalex.org/W2963208223","https://openalex.org/W2963289505","https://openalex.org/W2963508354","https://openalex.org/W2963670910","https://openalex.org/W3098853508","https://openalex.org/W4226065182","https://openalex.org/W6634753706","https://openalex.org/W6636827874","https://openalex.org/W6646884813","https://openalex.org/W6651768176","https://openalex.org/W6655688945","https://openalex.org/W6687434624","https://openalex.org/W6719439866","https://openalex.org/W6729863922","https://openalex.org/W6734760725","https://openalex.org/W6750827644","https://openalex.org/W6759166333"],"related_works":["https://openalex.org/W2963308241","https://openalex.org/W2287850282","https://openalex.org/W2945895484","https://openalex.org/W3174663468","https://openalex.org/W2486334580","https://openalex.org/W2281584511","https://openalex.org/W3126222328","https://openalex.org/W3003474222","https://openalex.org/W2061562262","https://openalex.org/W3202439292","https://openalex.org/W2898621619","https://openalex.org/W2944338412","https://openalex.org/W2785379783","https://openalex.org/W2756826236","https://openalex.org/W3093784762","https://openalex.org/W3205291599","https://openalex.org/W3176646154","https://openalex.org/W3162904728","https://openalex.org/W3034767611","https://openalex.org/W3133181182"],"abstract_inverted_index":{"Inverse":[0],"reinforcement":[1],"learning":[2],"(IRL)":[3],"infers":[4],"a":[5,38,55,93,125,136,143,156],"reward":[6,74],"function":[7],"from":[8,152],"demonstrations,":[9],"allowing":[10],"for":[11,52,87,106],"policy":[12,137],"improvement":[13],"and":[14,141],"generalization.":[15],"However,":[16],"despite":[17],"much":[18],"recent":[19],"interest":[20],"in":[21],"IRL,":[22],"little":[23],"work":[24,83],"has":[25],"been":[26],"done":[27],"to":[28,36,63,71,95,120,134],"understand":[29],"the":[30,45,60,65,73,78,96,108,129],"minimum":[31,66],"set":[32,97,109],"of":[33,47,68,77,110,131],"demonstrations":[34,51,69,154],"needed":[35,70,133],"teach":[37],"specific":[39],"sequential":[40,88],"decisionmaking":[41],"task.":[42],"We":[43,80,113],"formalize":[44],"problem":[46,58,99],"finding":[48],"maximally":[49],"informative":[50,153],"IRL":[53,140,145,158],"as":[54],"machine":[56,117],"teaching":[57,86,118],"where":[59],"goal":[61],"is":[62],"find":[64],"number":[67,130],"specify":[72],"equivalence":[75],"class":[76],"demonstrator.":[79],"extend":[81],"previous":[82],"on":[84,128],"algorithmic":[85],"decision-making":[89],"tasks":[90],"by":[91],"showing":[92],"reduction":[94],"cover":[98],"which":[100],"enables":[101],"an":[102],"efficient":[103],"approximation":[104],"algorithm":[105,119,146],"determining":[107],"maximallyinformative":[111],"demonstrations.":[112],"apply":[114],"our":[115],"proposed":[116],"two":[121],"novel":[122,144],"applications:":[123],"providing":[124],"lower":[126],"bound":[127],"queries":[132],"learn":[135,149],"using":[138],"active":[139],"developing":[142],"that":[147],"can":[148],"more":[150],"efficiently":[151],"than":[155],"standard":[157],"approach.":[159]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
