{"id":"https://openalex.org/W4416750880","doi":"https://doi.org/10.1109/iros60139.2025.11246127","title":"Embodied Instruction Following in Unknown Environments","display_name":"Embodied Instruction Following in Unknown Environments","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750880","doi":"https://doi.org/10.1109/iros60139.2025.11246127"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246127","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246127","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009794986","display_name":"Zhenyu Wu","orcid":"https://orcid.org/0000-0002-0133-2117"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhenyu Wu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Automation"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Automation","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389366","display_name":"Ziwei Wang","orcid":"https://orcid.org/0000-0001-9225-8495"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ziwei Wang","raw_affiliation_strings":["Nanyang Technological University,School of Electrical and Electronic Engineering"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,School of Electrical and Electronic Engineering","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100675886","display_name":"Xiuwei Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuwei Xu","raw_affiliation_strings":["Tsinghua University,Department of Automation"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Automation","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055596046","display_name":"Hang Yin","orcid":"https://orcid.org/0000-0002-5324-1962"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Yin","raw_affiliation_strings":["Tsinghua University,Department of Automation"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Automation","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084906179","display_name":"Yu Liang","orcid":"https://orcid.org/0000-0003-4259-6028"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinan Liang","raw_affiliation_strings":["Tsinghua University,Department of Automation"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Automation","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078675393","display_name":"Andy Ma","orcid":"https://orcid.org/0000-0003-3714-393X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Angyuan Ma","raw_affiliation_strings":["Tsinghua University,Department of Automation"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Automation","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460385","display_name":"Jiwen Lu","orcid":"https://orcid.org/0000-0002-6121-5529"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiwen Lu","raw_affiliation_strings":["Tsinghua University,Department of Automation"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Automation","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011536717","display_name":"Haibin Yan","orcid":"https://orcid.org/0000-0003-0811-6545"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibin Yan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Automation"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Automation","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5009794986"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":2.4362,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91258468,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"21825","last_page":"21832"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9527999758720398,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9527999758720398,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.008799999952316284,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.007699999958276749,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.8047999739646912},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6947000026702881},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.612500011920929},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5834000110626221},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5554999709129333},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5027999877929688},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.49540001153945923},{"id":"https://openalex.org/keywords/embodied-agent","display_name":"Embodied agent","score":0.482699990272522},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4754999876022339}],"concepts":[{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.8047999739646912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.746999979019165},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6947000026702881},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.659500002861023},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.612500011920929},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5834000110626221},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5554999709129333},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C103683099","wikidata":"https://www.wikidata.org/wiki/Q5370102","display_name":"Embodied agent","level":3,"score":0.482699990272522},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4754999876022339},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4478999972343445},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.367900013923645},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3386000096797943},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.31150001287460327},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.296099990606308},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.28839999437332153},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.2639000117778778},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246127","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246127","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W3034758614","https://openalex.org/W4200150166","https://openalex.org/W4214700710","https://openalex.org/W4312436794","https://openalex.org/W4385431115","https://openalex.org/W4389519523","https://openalex.org/W4389666115","https://openalex.org/W4390874280","https://openalex.org/W4413145337"],"related_works":[],"abstract_inverted_index":{"Enabling":[0],"embodied":[1,38,64,100],"agents":[2],"to":[3,13,36,84,91,133,168],"complete":[4],"complex":[5,70,216],"human":[6,24,151,164,217],"instructions":[7,25,218],"from":[8],"natural":[9],"language":[10,117],"is":[11,148,189],"crucial":[12],"autonomous":[14],"systems":[15],"in":[16,26,72,214,226],"household":[17],"services.":[18],"Conventional":[19],"methods":[20],"can":[21,209],"only":[22],"accomplish":[23,92],"the":[27,37,43,47,59,73,77,81,105,110,127,135,140,154,159,169,174,179,182,193,198],"known":[28,136,175,199],"environment":[29,49,83],"where":[30,76,139],"all":[31],"interactive":[32],"objects":[33,90],"are":[34,233],"provided":[35],"agent,":[39],"and":[40,109,145,173,197,223,231],"directly":[41],"deploying":[42],"existing":[44,89],"approaches":[45],"for":[46,69,150,163],"unknown":[48,74,82],"usually":[50],"generates":[51],"infeasible":[52],"plans":[53,87,162,196],"that":[54,206],"manipulate":[55],"non-existing":[56],"objects.":[57],"On":[58],"contrary,":[60],"we":[61,96,157],"propose":[62],"an":[63],"instruction":[65,101],"following":[66,102],"(EIF)":[67],"method":[68,208],"tasks":[71],"environment,":[75],"agent":[78],"efficiently":[79],"explores":[80],"generate":[85,158],"feasible":[86,160],"with":[88,114,129],"abstract":[93],"instructions.":[94],"Specifically,":[95],"build":[97],"a":[98,122],"hierarchical":[99],"framework":[103],"including":[104],"high-level":[106],"task":[107,143,155,170],"planner":[108],"low-level":[111],"exploration":[112,147,180],"controller":[113],"multimodal":[115],"large":[116,227],"models.":[118],"We":[119],"then":[120],"construct":[121],"semantic":[123],"representation":[124],"map":[125],"of":[126,142],"scene":[128,146],"dynamic":[130],"region":[131],"attention":[132],"demonstrate":[134,205],"visual":[137,176,200],"clues,":[138],"goal":[141,165],"planning":[144],"aligned":[149],"instruction.":[152],"For":[153,178],"planner,":[156],"step-by-step":[161],"accomplishment":[166],"according":[167],"completion":[171],"process":[172],"clues.":[177,201],"controller,":[181],"optimal":[183],"navigation":[184],"or":[185],"object":[186],"interaction":[187],"policy":[188],"predicted":[190],"based":[191],"on":[192],"generated":[194],"step-wise":[195],"The":[202],"experimental":[203],"results":[204],"our":[207],"achieve":[210],"45.09%":[211],"success":[212],"rate":[213],"204":[215],"such":[219],"as":[220],"making":[221],"breakfast":[222],"tidying":[224],"rooms":[225],"house-level":[228],"scenes.":[229],"Code":[230],"supplementary":[232],"available":[234],"at":[235],"https://gary3410.github.io/eif_unknown/.":[236]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
