{"id":"https://openalex.org/W3168377482","doi":"https://doi.org/10.24963/ijcai.2021/128","title":"Look Wide and Interpret Twice: Improving Performance on Interactive Instruction-following Tasks","display_name":"Look Wide and Interpret Twice: Improving Performance on Interactive Instruction-following Tasks","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3168377482","doi":"https://doi.org/10.24963/ijcai.2021/128","mag":"3168377482"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/128","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/128","pdf_url":"https://www.ijcai.org/proceedings/2021/0128.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0128.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103235650","display_name":"Van-Quang Nguyen","orcid":"https://orcid.org/0009-0007-1831-5763"},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Van-Quang Nguyen","raw_affiliation_strings":["Graduate School of Information Sciences, Tohoku University","Tohoku University (),"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Information Sciences, Tohoku University","institution_ids":["https://openalex.org/I201537933"]},{"raw_affiliation_string":"Tohoku University (),","institution_ids":["https://openalex.org/I201537933"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042810984","display_name":"Masanori Suganuma","orcid":"https://orcid.org/0000-0002-1469-9663"},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masanori Suganuma","raw_affiliation_strings":["RIKEN Center for AIP/ Graduate School of Information Sciences, Tohoku University","Tohoku University (),"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RIKEN Center for AIP/ Graduate School of Information Sciences, Tohoku University","institution_ids":["https://openalex.org/I201537933"]},{"raw_affiliation_string":"Tohoku University (),","institution_ids":["https://openalex.org/I201537933"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009259465","display_name":"Takayuki Okatani","orcid":"https://orcid.org/0000-0001-9222-763X"},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takayuki Okatani","raw_affiliation_strings":["Graduate School of Information Sciences, Tohoku University/ RIKEN Center for AIP","Tohoku University (),"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Information Sciences, Tohoku University/ RIKEN Center for AIP","institution_ids":["https://openalex.org/I201537933"]},{"raw_affiliation_string":"Tohoku University (),","institution_ids":["https://openalex.org/I201537933"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103235650"],"corresponding_institution_ids":["https://openalex.org/I201537933"],"apc_list":null,"apc_paid":null,"fwci":0.194,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.47693673,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"923","last_page":"930"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.846062183380127},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7364276051521301},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.7098029851913452},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6564860343933105},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6266562342643738},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.621016263961792},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.5943512320518494},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.554690957069397},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4941737949848175},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42606133222579956},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.42130303382873535},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3791888952255249}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.846062183380127},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7364276051521301},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.7098029851913452},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6564860343933105},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6266562342643738},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.621016263961792},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.5943512320518494},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.554690957069397},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4941737949848175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42606133222579956},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.42130303382873535},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3791888952255249},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2021/128","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/128","pdf_url":"https://www.ijcai.org/proceedings/2021/0128.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.00596","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.00596","pdf_url":"https://arxiv.org/pdf/2106.00596","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3168377482","is_oa":true,"landing_page_url":"http://arxiv.org/pdf/2106.00596.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.00596","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.00596","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/128","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/128","pdf_url":"https://www.ijcai.org/proceedings/2021/0128.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8299999833106995}],"awards":[{"id":"https://openalex.org/G6852524107","display_name":null,"funder_award_id":"20H05952","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3168377482.pdf","grobid_xml":"https://content.openalex.org/works/W3168377482.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W2095705004","https://openalex.org/W2250539671","https://openalex.org/W2362143032","https://openalex.org/W2618235849","https://openalex.org/W2745990901","https://openalex.org/W2755286543","https://openalex.org/W2772262724","https://openalex.org/W2774005037","https://openalex.org/W2776202271","https://openalex.org/W2783375473","https://openalex.org/W2799002257","https://openalex.org/W2805984364","https://openalex.org/W2810346659","https://openalex.org/W2884565639","https://openalex.org/W2900790749","https://openalex.org/W2903172725","https://openalex.org/W2903932979","https://openalex.org/W2907729764","https://openalex.org/W2909303996","https://openalex.org/W2926977875","https://openalex.org/W2930283066","https://openalex.org/W2962684798","https://openalex.org/W2963088756","https://openalex.org/W2963150697","https://openalex.org/W2963447367","https://openalex.org/W2963575117","https://openalex.org/W2963689319","https://openalex.org/W2963726321","https://openalex.org/W2963871073","https://openalex.org/W2964339842","https://openalex.org/W2964935470","https://openalex.org/W2967186499","https://openalex.org/W2970971581","https://openalex.org/W2971077754","https://openalex.org/W2979727876","https://openalex.org/W2990021239","https://openalex.org/W2993086250","https://openalex.org/W2997886377","https://openalex.org/W3014266552","https://openalex.org/W3023306062","https://openalex.org/W3029418112","https://openalex.org/W3034758614","https://openalex.org/W3043450547","https://openalex.org/W3092516542","https://openalex.org/W3094399892","https://openalex.org/W3107092117","https://openalex.org/W3109085430","https://openalex.org/W3112356180","https://openalex.org/W6691431627","https://openalex.org/W6746518932","https://openalex.org/W6752333115","https://openalex.org/W6759920914","https://openalex.org/W6765527290","https://openalex.org/W6863631769","https://openalex.org/W6863994431","https://openalex.org/W6864014924"],"related_works":["https://openalex.org/W3188987547","https://openalex.org/W3173781631","https://openalex.org/W3205055432","https://openalex.org/W2890902815","https://openalex.org/W3010436080","https://openalex.org/W3021016503","https://openalex.org/W3107094551","https://openalex.org/W2998008599","https://openalex.org/W3206919976","https://openalex.org/W2905039743","https://openalex.org/W2918407751","https://openalex.org/W2982482202","https://openalex.org/W2988536330","https://openalex.org/W2571312264","https://openalex.org/W2476698163","https://openalex.org/W2919041517","https://openalex.org/W3133815862","https://openalex.org/W3093006710","https://openalex.org/W3159526778","https://openalex.org/W2807666747"],"abstract_inverted_index":{"There":[0],"is":[1,63,73,125,202],"a":[2,15,35,50,59,66,74,94,198],"growing":[3],"interest":[4],"in":[5,8,127],"the":[6,31,39,55,78,102,105,110,120,128,135,139,150,162,168,179,182,190],"community":[7],"making":[9],"an":[10,21,87,114,117],"embodied":[11],"AI":[12],"agent":[13],"perform":[14],"complicated":[16],"task":[17],"while":[18],"interacting":[19],"with":[20,104,197,207],"environment":[22,151],"following":[23],"natural":[24],"language":[25],"directives.":[26],"Recent":[27],"studies":[28],"have":[29],"tackled":[30],"problem":[32],"using":[33,90],"ALFRED,":[34],"well-designed":[36],"dataset":[37],"for":[38,173],"task,":[40],"but":[41],"achieved":[42],"only":[43],"very":[44],"low":[45],"accuracy.":[46],"This":[47,165],"paper":[48],"proposes":[49],"new":[51,70],"method,":[52],"which":[53,201],"outperforms":[54],"previous":[56],"methods":[57],"by":[58,156],"large":[60],"margin.":[61],"It":[62,99],"based":[64],"on":[65,161],"combination":[67],"of":[68,77,113,149,171,178,195],"several":[69],"ideas.":[71],"One":[72],"two-stage":[75],"interpretation":[76],"provided":[79],"instructions.":[80],"The":[81,186],"method":[82,144,180],"first":[83,129],"selects":[84],"and":[85,116,152],"interprets":[86],"instruction":[88],"without":[89],"visual":[91,106],"information,":[92],"yielding":[93,109],"tentative":[95],"action":[96,115],"sequence":[97],"prediction.":[98],"then":[100],"integrates":[101],"prediction":[103,112,170],"information":[107,155],"etc.,":[108],"final":[111],"object.":[118],"As":[119],"object's":[121],"class":[122],"to":[123,167,205],"interact":[124],"identified":[126],"stage,":[130],"it":[131],"can":[132],"accurately":[133],"select":[134],"correct":[136],"object":[137],"from":[138],"input":[140],"image.":[141],"Moreover,":[142],"our":[143],"considers":[145],"multiple":[146,208],"egocentric":[147],"views":[148],"extracts":[153],"essential":[154],"applying":[157],"hierarchical":[158],"attention":[159],"conditioned":[160],"current":[163,187],"instruction.":[164],"contributes":[166],"accurate":[169],"actions":[172],"navigation.":[174],"A":[175],"preliminary":[176],"version":[177,188],"won":[181],"ALFRED":[183],"Challenge":[184],"2020.":[185],"achieves":[189],"unseen":[191],"environment's":[192],"success":[193],"rate":[194],"4.45%":[196],"single":[199],"view,":[200],"further":[203],"improved":[204],"8.37%":[206],"views.":[209]},"counts_by_year":[{"year":2021,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
