{"id":"https://openalex.org/W2920768993","doi":"https://doi.org/10.24963/ijcai.2019/331","title":"Using Natural Language for Reward Shaping in Reinforcement Learning","display_name":"Using Natural Language for Reward Shaping in Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-28","ids":{"openalex":"https://openalex.org/W2920768993","doi":"https://doi.org/10.24963/ijcai.2019/331","mag":"2920768993"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2019/331","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/331","pdf_url":"https://www.ijcai.org/proceedings/2019/0331.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2019/0331.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072713291","display_name":"Prasoon Goyal","orcid":"https://orcid.org/0000-0003-3121-1241"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Prasoon Goyal","raw_affiliation_strings":["The University of Texas at Austin","University of Texas at Austin"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["The University of Texas at Austin","the University of Texas at, Austin"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"the University of Texas at, Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008715111","display_name":"Raymond J. Mooney","orcid":"https://orcid.org/0000-0002-4504-0490"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raymond J. Mooney","raw_affiliation_strings":["The University of Texas at Austin","the University of Texas at, Austin"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"the University of Texas at, Austin","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072713291"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":3.6127,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.94344611,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2385","last_page":"2391"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8885947465896606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7498441338539124},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6708338260650635},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6619535684585571},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6034948825836182},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5629891753196716},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5474140048027039},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.4962926506996155},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4664299786090851},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4612841010093689},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3242228627204895},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17117926478385925},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08522140979766846},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0569230318069458},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.05547323822975159}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8885947465896606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7498441338539124},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6708338260650635},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6619535684585571},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6034948825836182},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5629891753196716},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5474140048027039},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.4962926506996155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4664299786090851},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4612841010093689},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3242228627204895},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17117926478385925},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08522140979766846},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0569230318069458},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.05547323822975159},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2019/331","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/331","pdf_url":"https://www.ijcai.org/proceedings/2019/0331.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1903.02020","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.02020","pdf_url":"https://arxiv.org/pdf/1903.02020","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2920768993","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1903.02020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1903.02020","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1903.02020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2019/331","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/331","pdf_url":"https://www.ijcai.org/proceedings/2019/0331.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G187671969","display_name":null,"funder_award_id":"IIS-1637736","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8653745736","display_name":"NRI: Robots that Learn to Communicate through Natural Human Dialog","funder_award_id":"1637736","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2920768993.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1777239053","https://openalex.org/W2109462874","https://openalex.org/W2140584963","https://openalex.org/W2250539671","https://openalex.org/W2410983263","https://openalex.org/W2609374097","https://openalex.org/W2611884151","https://openalex.org/W2612675303","https://openalex.org/W2619240030","https://openalex.org/W2736601468","https://openalex.org/W2741122588","https://openalex.org/W2766371743","https://openalex.org/W2805465728","https://openalex.org/W2890809352","https://openalex.org/W2900790749","https://openalex.org/W2950635152","https://openalex.org/W2963918774","https://openalex.org/W3098581361","https://openalex.org/W3103780890","https://openalex.org/W3110909889","https://openalex.org/W3121854931"],"related_works":["https://openalex.org/W2964654516","https://openalex.org/W2118781169","https://openalex.org/W2145339207","https://openalex.org/W2950172727","https://openalex.org/W2986106408","https://openalex.org/W1130790960","https://openalex.org/W3131041265","https://openalex.org/W2897673281","https://openalex.org/W158183001","https://openalex.org/W567721252","https://openalex.org/W2964121744","https://openalex.org/W1777239053","https://openalex.org/W3154048159","https://openalex.org/W2620451350","https://openalex.org/W2079247031","https://openalex.org/W2998135952","https://openalex.org/W3144850808","https://openalex.org/W2400719195","https://openalex.org/W2513173501","https://openalex.org/W2911718261"],"abstract_inverted_index":{"Recent":[0],"reinforcement":[1,93],"learning":[2,94,149],"(RL)":[3],"approaches":[4],"have":[5],"shown":[6],"strong":[7],"performance":[8],"in":[9,110],"complex":[10],"domains,":[11],"such":[12,54],"as":[13],"Atari":[14,103],"games,":[15],"but":[16],"are":[17],"highly":[18],"sample":[19],"inefficient.":[20],"A":[21],"common":[22],"approach":[23],"to":[24,32,68,82,148],"reduce":[25],"interaction":[26],"time":[27],"with":[28,98,129],"the":[29,44,51,102,124,130,138],"environment":[30],"is":[31],"use":[33,64],"reward":[34,40,70],"shaping,":[35],"which":[36],"involves":[37],"carefully":[38],"designing":[39],"functions":[41],"that":[42,76,85,122],"provide":[43],"agent":[45],"intermediate":[46,83],"rewards":[47,55,134],"for":[48,123],"progress":[49],"towards":[50],"goal.":[52],"Designing":[53],"remains":[56],"a":[57,74,107,115],"challenge,":[58],"though.":[59],"In":[60],"this":[61],"work,":[62],"we":[63],"natural":[65,79],"language":[66,80],"instructions":[67,81],"perform":[69],"shaping.":[71],"We":[72,96],"propose":[73],"framework":[75],"maps":[77],"free-form":[78],"rewards,":[84],"can":[86,135],"seamlessly":[87],"be":[88],"integrated":[89],"into":[90],"any":[91],"standard":[92],"algorithm.":[95],"experiment":[97],"Montezuma's":[99],"Revenge":[100],"from":[101],"video":[104],"games":[105],"domain,":[106],"popular":[108],"benchmark":[109],"RL.":[111],"Our":[112],"experiments":[113],"on":[114],"diverse":[116],"set":[117],"of":[118,127],"15":[119],"tasks":[120],"demonstrate":[121],"same":[125],"number":[126],"interactions":[128],"environment,":[131],"using":[132],"language-based":[133],"successfully":[136],"complete":[137],"task":[139],"60%":[140],"more":[141],"often,":[142],"averaged":[143],"across":[144],"all":[145],"tasks,":[146],"compared":[147],"without":[150],"language.":[151]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
