{"id":"https://openalex.org/W3009356492","doi":"https://doi.org/10.1145/3319502.3374824","title":"Teaching a Robot Tasks of Arbitrary Complexity via Human Feedback","display_name":"Teaching a Robot Tasks of Arbitrary Complexity via Human Feedback","publication_year":2020,"publication_date":"2020-03-07","ids":{"openalex":"https://openalex.org/W3009356492","doi":"https://doi.org/10.1145/3319502.3374824","mag":"3009356492"},"language":"en","primary_location":{"id":"doi:10.1145/3319502.3374824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3319502.3374824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100325286","display_name":"Guan Wang","orcid":"https://orcid.org/0000-0002-8126-4440"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Guan Wang","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033455504","display_name":"Carl Trimbach","orcid":null},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carl Trimbach","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010583468","display_name":"Jun Ki Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Ki Lee","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083477814","display_name":"Mark K. Ho","orcid":"https://orcid.org/0000-0002-1454-4768"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark K. Ho","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009722403","display_name":"Michael L. Littman","orcid":"https://orcid.org/0000-0002-5596-1840"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael L. Littman","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100325286"],"corresponding_institution_ids":["https://openalex.org/I27804330"],"apc_list":null,"apc_paid":null,"fwci":0.6628,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.74869613,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"649","last_page":"657"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7980098724365234},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7813253998756409},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6951693296432495},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6878657341003418},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6440079808235168},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5535778999328613},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5383478403091431},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5320782661437988},{"id":"https://openalex.org/keywords/carry","display_name":"Carry (investment)","score":0.4843112528324127},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4709884822368622},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4490516781806946},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4195597171783447},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4134013056755066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7980098724365234},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7813253998756409},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6951693296432495},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6878657341003418},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6440079808235168},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5535778999328613},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5383478403091431},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5320782661437988},{"id":"https://openalex.org/C2776299755","wikidata":"https://www.wikidata.org/wiki/Q432449","display_name":"Carry (investment)","level":2,"score":0.4843112528324127},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4709884822368622},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4490516781806946},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4195597171783447},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4134013056755066},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3319502.3374824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3319502.3374824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W122021961","https://openalex.org/W745775011","https://openalex.org/W1541730457","https://openalex.org/W1576818901","https://openalex.org/W1589747210","https://openalex.org/W1970543914","https://openalex.org/W1999874108","https://openalex.org/W2027106130","https://openalex.org/W2061562262","https://openalex.org/W2098774185","https://openalex.org/W2121110499","https://openalex.org/W2121863487","https://openalex.org/W2151958719","https://openalex.org/W2156869222","https://openalex.org/W2181849516","https://openalex.org/W2261436455","https://openalex.org/W2312609093","https://openalex.org/W2346373674","https://openalex.org/W2395263189","https://openalex.org/W2410842990","https://openalex.org/W2492375137","https://openalex.org/W2539402368","https://openalex.org/W2580300496","https://openalex.org/W2592535843","https://openalex.org/W2597613574","https://openalex.org/W2599276130","https://openalex.org/W2605514557","https://openalex.org/W2613939043","https://openalex.org/W2621205314","https://openalex.org/W2807161818","https://openalex.org/W2885467852","https://openalex.org/W2889713213","https://openalex.org/W2914941842","https://openalex.org/W2964111087","https://openalex.org/W2964263543","https://openalex.org/W2965569070","https://openalex.org/W2998674842","https://openalex.org/W4230725350"],"related_works":["https://openalex.org/W2807720855","https://openalex.org/W4229867111","https://openalex.org/W1488770949","https://openalex.org/W2793180915","https://openalex.org/W2386495035","https://openalex.org/W1582047458","https://openalex.org/W2071291085","https://openalex.org/W2113094707","https://openalex.org/W1514266315","https://openalex.org/W2967461658"],"abstract_inverted_index":{"This":[0],"paper":[1],"addresses":[2],"the":[3,38,61,73,87],"problem":[4],"of":[5,14,35,60,75,83,110,115],"training":[6,56,113],"a":[7,33,55,81,97],"robot":[8,39],"to":[9,40,48],"carry":[10],"out":[11],"temporal":[12],"tasks":[13,43,95],"arbitrary":[15],"complexity":[16],"via":[17],"evaluative":[18],"human":[19,91],"feedback":[20,138],"that":[21,85,89,99,131],"can":[22,65,93],"be":[23],"inaccurate.":[24],"A":[25],"key":[26],"idea":[27],"explored":[28],"in":[29,72,96],"our":[30,103,116,121,127,132],"work":[31],"is":[32,100],"kind":[34],"curriculum":[36],"learning---training":[37],"master":[41],"simple":[42],"and":[44,67,126],"then":[45],"building":[46],"up":[47],"more":[49,107],"complex":[50],"tasks.":[51],"We":[52,78,119],"show":[53],"how":[54],"procedure,":[57],"using":[58],"knowledge":[59],"formal":[62],"task":[63,70],"representation,":[64],"decompose":[66,94],"train":[68],"any":[69],"efficiently":[71],"size":[74],"its":[76],"representation.":[77],"further":[79],"provide":[80],"set":[82],"experiments":[84],"support":[86],"claim":[88],"non-expert":[90],"trainers":[92],"way":[98],"consistent":[101],"with":[102,106,123],"theoretical":[104],"results,":[105],"than":[108],"half":[109],"participants":[111],"successfully":[112],"all":[114],"experimental":[117,128],"missions.":[118],"compared":[120],"algorithm":[122],"existing":[124],"approaches":[125],"results":[129],"suggest":[130],"method":[133],"outperforms":[134],"alternatives,":[135],"especially":[136],"when":[137],"contains":[139],"mistakes.":[140]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
