{"id":"https://openalex.org/W2742096003","doi":"https://doi.org/10.1017/s0269888918000206","title":"Leveraging Human Knowledge in Tabular Reinforcement Learning: A Study of Human Subjects","display_name":"Leveraging Human Knowledge in Tabular Reinforcement Learning: A Study of Human Subjects","publication_year":2017,"publication_date":"2017-07-28","ids":{"openalex":"https://openalex.org/W2742096003","doi":"https://doi.org/10.1017/s0269888918000206","mag":"2742096003"},"language":"en","primary_location":{"id":"doi:10.1017/s0269888918000206","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888918000206","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2017/0534.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082947685","display_name":"Ariel Rosenfeld","orcid":"https://orcid.org/0000-0002-3230-3060"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Ariel Rosenfeld","raw_affiliation_strings":["Department of Management, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat Gan, Israel; e-mail:"],"affiliations":[{"raw_affiliation_string":"Department of Management, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat Gan, Israel; e-mail:","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070914351","display_name":"Matthew E. Taylor","orcid":"https://orcid.org/0000-0001-8946-0211"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]},{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Moshe Cohen","raw_affiliation_strings":["Department of Computer Science, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat-Gan, Israel e-mail:","Bar-Ilan University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat-Gan, Israel e-mail:","institution_ids":[]},{"raw_affiliation_string":"Bar-Ilan University","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103213461","display_name":"Sarit Kraus","orcid":"https://orcid.org/0000-0003-4672-623X"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]},{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Matthew E. Taylor","raw_affiliation_strings":["Department of Computer Science, Washington State University, Pullman, WA 99164, USA e-mail:","washington state university"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Washington State University, Pullman, WA 99164, USA e-mail:","institution_ids":[]},{"raw_affiliation_string":"washington state university","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sarit Kraus","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sarit Kraus","raw_affiliation_strings":["Department of Computer Science, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat-Gan, Israel e-mail:","Bar-Ilan University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Bar-Ilan University, Max and Anna Webb Street, 5290002 Ramat-Gan, Israel e-mail:","institution_ids":[]},{"raw_affiliation_string":"Bar-Ilan University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082947685"],"corresponding_institution_ids":["https://openalex.org/I13955877"],"apc_list":null,"apc_paid":null,"fwci":1.455,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.8683657,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"33","issue":null,"first_page":"3823","last_page":"3830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9580000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9580000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9165999889373779,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9002000093460083,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7814900875091553},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7553988695144653},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6379059553146362},{"id":"https://openalex.org/keywords/sass","display_name":"Sass","score":0.5931688547134399},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5754291415214539},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5275207161903381},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.475887656211853},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4707178473472595},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.46848949790000916},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.42317190766334534},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.40655797719955444},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07432025671005249},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.07318902015686035}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7814900875091553},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7553988695144653},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6379059553146362},{"id":"https://openalex.org/C2778917941","wikidata":"https://www.wikidata.org/wiki/Q1572865","display_name":"Sass","level":2,"score":0.5931688547134399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5754291415214539},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5275207161903381},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.475887656211853},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4707178473472595},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.46848949790000916},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.42317190766334534},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40655797719955444},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07432025671005249},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.07318902015686035},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1017/s0269888918000206","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888918000206","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"},{"id":"doi:10.24963/ijcai.2017/534","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/534","pdf_url":"https://www.ijcai.org/proceedings/2017/0534.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1805.05769","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.05769","pdf_url":"https://arxiv.org/pdf/1805.05769","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2742096003","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1805.05769","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1805.05769","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1805.05769","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2017/534","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/534","pdf_url":"https://www.ijcai.org/proceedings/2017/0534.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2754051857","display_name":null,"funder_award_id":"NNX16CD07C","funder_id":"https://openalex.org/F4320306101","funder_display_name":"National Aeronautics and Space Administration"},{"id":"https://openalex.org/G2792800478","display_name":null,"funder_award_id":"2014-67021","funder_id":"https://openalex.org/F4320306114","funder_display_name":"U.S. Department of Agriculture"},{"id":"https://openalex.org/G285167939","display_name":null,"funder_award_id":"IIS-1149917","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3040850200","display_name":"CAREER: A Multiagent Teacher/Student Framework for Sequential Decision Making Tasks","funder_award_id":"1149917","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3405304517","display_name":"EAGER:   Income Learning:   A New Model for Behavior-Analysis-Inspired Learning from Human Feedback","funder_award_id":"1643614","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5786867099","display_name":null,"funder_award_id":"IIS-1643614","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8109057036","display_name":null,"funder_award_id":"2014-67021-22174","funder_id":"https://openalex.org/F4320306114","funder_display_name":"U.S. Department of Agriculture"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306101","display_name":"National Aeronautics and Space Administration","ror":"https://ror.org/027ka1x80"},{"id":"https://openalex.org/F4320306114","display_name":"U.S. Department of Agriculture","ror":"https://ror.org/01na82s61"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2742096003.pdf","grobid_xml":"https://content.openalex.org/works/W2742096003.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W96529184","https://openalex.org/W130529780","https://openalex.org/W139075666","https://openalex.org/W145446370","https://openalex.org/W183249136","https://openalex.org/W1499408472","https://openalex.org/W1542941925","https://openalex.org/W1570448133","https://openalex.org/W1585603966","https://openalex.org/W1604363237","https://openalex.org/W1626155273","https://openalex.org/W1777239053","https://openalex.org/W1997303620","https://openalex.org/W2008809493","https://openalex.org/W2089415692","https://openalex.org/W2097797606","https://openalex.org/W2103048296","https://openalex.org/W2115668428","https://openalex.org/W2116157560","https://openalex.org/W2121863487","https://openalex.org/W2131831090","https://openalex.org/W2145339207","https://openalex.org/W2156578004","https://openalex.org/W2157289187","https://openalex.org/W2158304715","https://openalex.org/W2169659168","https://openalex.org/W2198041288","https://openalex.org/W2262257077","https://openalex.org/W2397581010","https://openalex.org/W2460411341","https://openalex.org/W2491675558","https://openalex.org/W2581945288","https://openalex.org/W2621262673","https://openalex.org/W2742096003","https://openalex.org/W2792966959","https://openalex.org/W2904396358","https://openalex.org/W2984187004","https://openalex.org/W3011120880","https://openalex.org/W4234438384","https://openalex.org/W6675223484","https://openalex.org/W6692458258","https://openalex.org/W6727405266"],"related_works":["https://openalex.org/W2962710194","https://openalex.org/W2103064945","https://openalex.org/W2964263543","https://openalex.org/W2963025296","https://openalex.org/W3005888089","https://openalex.org/W3167658443","https://openalex.org/W2201750637","https://openalex.org/W2888816922","https://openalex.org/W2968268642","https://openalex.org/W3025362081","https://openalex.org/W2945719686","https://openalex.org/W2964855005","https://openalex.org/W3098338892","https://openalex.org/W3173555766","https://openalex.org/W3203056473","https://openalex.org/W3037114224","https://openalex.org/W2272929109","https://openalex.org/W2344013593","https://openalex.org/W2963794592","https://openalex.org/W2949059942"],"abstract_inverted_index":{"Abstract":[0],"Reinforcement":[1],"learning":[2],"(RL)":[3],"can":[4,171],"be":[5,146,172],"extremely":[6],"effective":[7],"in":[8,40,66,107,134],"solving":[9],"complex,":[10],"real-world":[11],"problems.":[12],"However,":[13,162],"injecting":[14,61],"human":[15,29,34,62,70,114,119,127,193],"knowledge":[16,63,76],"into":[17],"an":[18],"RL":[19,47],"agent":[20],"may":[21],"require":[22],"extensive":[23],"effort":[24,195],"and":[25,43,77,156,174,180],"expertise":[26],"on":[27,102,125],"the":[28,41,81,103,108,126,139,147],"designer\u2019s":[30],"part.":[31],"To":[32],"date,":[33],"factors":[35,128],"are":[36,64],"generally":[37],"not":[38],"considered":[39],"development":[42],"evaluation":[44,84],"of":[45,72,75,85,105,117],"possible":[46],"approaches.":[48],"In":[49],"this":[50,113],"article,":[51],"we":[52,121,163],"set":[53],"out":[54],"to":[55,145,158,185],"investigate":[56],"how":[57],"different":[58],"methods":[59],"for":[60,151],"applied,":[65],"practice":[67],",":[68],"by":[69],"designers":[71],"varying":[73],"levels":[74],"skill.":[78],"We":[79,136],"perform":[80],"first":[82],"empirical":[83],"several":[86],"methods,":[87],"including":[88],"a":[89,131,182,188],"newly":[90],"proposed":[91,168],"method":[92,150,169,190],"named":[93],"State":[94],"Action":[95],"Similarity":[96],"Solutions":[97],"(SASS)":[98],"which":[99],"is":[100],"based":[101],"notion":[104],"similarities":[106],"agent\u2019s":[109],"state\u2013action":[110],"space.":[111],"Through":[112],"study,":[115],"consisting":[116],"51":[118],"participants,":[120],"shed":[122],"new":[123],"light":[124],"that":[129,138,166],"play":[130],"key":[132],"role":[133],"RL.":[135,161],"find":[137,165],"classical":[140],"reward":[141,178],"shaping":[142],"technique":[143],"seems":[144],"most":[148,152],"natural":[149],"designers,":[153],"both":[154],"expert":[155],"non-expert,":[157],"speed":[159],"up":[160],"further":[164],"our":[167],"SASS":[170],"effectively":[173],"efficiently":[175],"combined":[176],"with":[177,191],"shaping,":[179],"provides":[181],"beneficial":[183],"alternative":[184],"using":[186],"only":[187],"single-speedup":[189],"minimal":[192],"designer":[194],"overhead.":[196]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
