{"id":"https://openalex.org/W3096495169","doi":"https://doi.org/10.24963/ijcai.2021/306","title":"Reinforcement Learning for Sparse-Reward Object-Interaction Tasks in a First-person Simulated 3D Environment","display_name":"Reinforcement Learning for Sparse-Reward Object-Interaction Tasks in a First-person Simulated 3D Environment","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3096495169","doi":"https://doi.org/10.24963/ijcai.2021/306","mag":"3096495169"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/306","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/306","pdf_url":"https://www.ijcai.org/proceedings/2021/0306.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0306.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065654942","display_name":"Wilka Carvalho","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wilka Carvalho","raw_affiliation_strings":["University of Michigan","\u2020University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"\u2020University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018923111","display_name":"Anthony Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anthony Liang","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033571780","display_name":"Kimin Lee","orcid":"https://orcid.org/0000-0001-9017-3084"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kimin Lee","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019504524","display_name":"Sungryull Sohn","orcid":"https://orcid.org/0000-0001-7733-4293"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sungryull Sohn","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108652283","display_name":"Honglak Lee","orcid":"https://orcid.org/0000-0002-1279-0068"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Honglak Lee","raw_affiliation_strings":["LG AI Research","University of Michigan"],"affiliations":[{"raw_affiliation_string":"LG AI Research","institution_ids":[]},{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077109450","display_name":"Richard L. Lewis","orcid":"https://orcid.org/0000-0001-6403-489X"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Lewis","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065366930","display_name":"Satinder Singh","orcid":"https://orcid.org/0000-0002-2736-7641"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Satinder Singh","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065654942"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00942031,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2219","last_page":"2226"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.946399986743927,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7787752151489258},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.7306846976280212},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6810858249664307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6240427494049072},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5659946203231812},{"id":"https://openalex.org/keywords/learning-object","display_name":"Learning object","score":0.5434035062789917},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.47035884857177734},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3991504907608032},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3536505103111267}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7787752151489258},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.7306846976280212},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6810858249664307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6240427494049072},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5659946203231812},{"id":"https://openalex.org/C2779542340","wikidata":"https://www.wikidata.org/wiki/Q1062461","display_name":"Learning object","level":2,"score":0.5434035062789917},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.47035884857177734},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3991504907608032},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3536505103111267},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.24963/ijcai.2021/306","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/306","pdf_url":"https://www.ijcai.org/proceedings/2021/0306.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"mag:3096495169","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2010.15195.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2010.15195","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2010.15195","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/306","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/306","pdf_url":"https://www.ijcai.org/proceedings/2021/0306.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1171700966","display_name":null,"funder_award_id":"NSF CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G592113747","display_name":null,"funder_award_id":"1453651","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3096495169.pdf","grobid_xml":"https://content.openalex.org/works/W3096495169.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W2133001472","https://openalex.org/W2138621090","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2157617585","https://openalex.org/W2210408922","https://openalex.org/W2555897561","https://openalex.org/W2620290674","https://openalex.org/W2753738274","https://openalex.org/W2776202271","https://openalex.org/W2807340089","https://openalex.org/W2944892105","https://openalex.org/W2952355813","https://openalex.org/W2962684798","https://openalex.org/W2962841471","https://openalex.org/W2963403868","https://openalex.org/W2963447367","https://openalex.org/W2970764142","https://openalex.org/W2982320652","https://openalex.org/W2991222810","https://openalex.org/W3028924880","https://openalex.org/W3029095206","https://openalex.org/W3031673669","https://openalex.org/W3034758614"],"related_works":["https://openalex.org/W3189061934","https://openalex.org/W3088302932","https://openalex.org/W1604959332","https://openalex.org/W2934961104","https://openalex.org/W3104898494","https://openalex.org/W2788781499","https://openalex.org/W3122543654","https://openalex.org/W2943342199","https://openalex.org/W2510924756","https://openalex.org/W2805590920","https://openalex.org/W3181648466","https://openalex.org/W2889681745","https://openalex.org/W2963619650","https://openalex.org/W3134527491","https://openalex.org/W2394794665","https://openalex.org/W2890179775","https://openalex.org/W3100466381","https://openalex.org/W3186035148","https://openalex.org/W2768815665","https://openalex.org/W2105741987"],"abstract_inverted_index":{"Learning":[0],"how":[1],"to":[2,27,43,54,76,129,157,189],"execute":[3],"complex":[4],"tasks":[5],"involving":[6],"multiple":[7],"objects":[8,23,177],"in":[9,137,178,192],"a":[10,35,70,99,145],"3D":[11,139],"world":[12],"is":[13,17,171],"challenging":[14,42,148],"when":[15],"there":[16],"no":[18],"ground-truth":[19,174],"information":[20,175],"about":[21,176],"the":[22,45,50,56,123,138,163,179,190],"or":[24],"any":[25],"demonstration":[26],"learn":[28,44,131],"from.":[29],"When":[30],"an":[31,64,94,166,168],"agent":[32,169,185],"only":[33],"receives":[34,122],"signal":[36,126],"from":[37],"task-completion,":[38],"this":[39,59,88],"makes":[40],"it":[41,98,104,127],"object-representations":[46],"which":[47],"support":[48],"learning":[49,63,91,196],"correct":[51],"object-interactions":[52],"needed":[53],"complete":[55],"task.":[57],"In":[58],"work,":[60],"we":[61],"formulate":[62],"attentive":[65],"object":[66],"dynamics":[67],"model":[68],"as":[69],"classification":[71],"problem,":[72],"using":[73],"random":[74],"object-images":[75],"define":[77],"incorrect":[78],"labels":[79],"for":[80],"our":[81,115,154,184],"object-dynamics":[82],"model.":[83],"We":[84,134,152,181],"show":[85],"empirically":[86],"that":[87,92,170,183],"enables":[89],"object-representation":[90],"captures":[93],"object's":[95],"category":[96],"(is":[97,103,108],"toaster?),":[100],"its":[101],"properties":[102],"on?),":[105],"and":[106,161,198],"object-relations":[107],"something":[109],"inside":[110],"of":[111,147,165,194],"it?).":[112],"With":[113],"this,":[114],"core":[116],"learner":[117],"(a":[118],"relational":[119],"RL":[120],"agent)":[121],"dense":[124],"training":[125],"needs":[128],"rapidly":[130],"object-interaction":[132],"tasks.":[133,151],"demonstrate":[135],"results":[136],"AI2Thor":[140],"simulated":[141],"kitchen":[142],"environment":[143],"with":[144,173],"range":[146],"food":[149],"preparation":[150],"compare":[153],"method's":[155],"performance":[156,164,187],"several":[158],"related":[159],"approaches":[160],"against":[162],"oracle:":[167],"supplied":[172],"scene.":[180],"find":[182],"achieves":[186],"closest":[188],"oracle":[191],"terms":[193],"both":[195],"speed":[197],"maximum":[199],"success":[200],"rate.":[201]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
