{"id":"https://openalex.org/W4401057329","doi":"https://doi.org/10.1007/s11263-024-02159-8","title":"ESceme: Vision-and-Language Navigation with Episodic Scene Memory","display_name":"ESceme: Vision-and-Language Navigation with Episodic Scene Memory","publication_year":2024,"publication_date":"2024-07-26","ids":{"openalex":"https://openalex.org/W4401057329","doi":"https://doi.org/10.1007/s11263-024-02159-8"},"language":"en","primary_location":{"id":"doi:10.1007/s11263-024-02159-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02159-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02159-8.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02159-8.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101610027","display_name":"Qi Zheng","orcid":"https://orcid.org/0000-0001-6879-8256"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["AU","CN"],"is_corresponding":true,"raw_author_name":"Qi Zheng","raw_affiliation_strings":["College of Electronics and Information Engineering, Shenzhen University, 518000, Shenzhen, China","School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Electronics and Information Engineering, Shenzhen University, 518000, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101424059","display_name":"Daqing Liu","orcid":"https://orcid.org/0000-0002-8286-0105"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daqing Liu","raw_affiliation_strings":["JD Explore Academy, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD Explore Academy, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101701657","display_name":"Chaoyue Wang","orcid":"https://orcid.org/0000-0002-9002-1029"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoyue Wang","raw_affiliation_strings":["JD Explore Academy, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD Explore Academy, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345321","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0001-6595-7661"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia"],"raw_orcid":"https://orcid.org/0000-0001-6595-7661","affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053719139","display_name":"Dadong Wang","orcid":"https://orcid.org/0000-0003-0409-2259"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dadong Wang","raw_affiliation_strings":["DATA 61, CSIRO, Sydney, NSW, 2122, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DATA 61, CSIRO, Sydney, NSW, 2122, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108378610","display_name":"Dacheng Tao","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dacheng Tao","raw_affiliation_strings":["School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney, Sydney, NSW, 2008, Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101610027"],"corresponding_institution_ids":["https://openalex.org/I129604602","https://openalex.org/I180726961"],"apc_list":{"value":2890,"currency":"EUR","value_usd":3690},"apc_paid":{"value":2890,"currency":"EUR","value_usd":3690},"fwci":1.1219,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78761904,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"133","issue":"1","first_page":"254","last_page":"274"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9660999774932861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8244855403900146},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.7559086084365845},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.7103192806243896},{"id":"https://openalex.org/keywords/episodic-memory","display_name":"Episodic memory","score":0.5652841925621033},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5557907819747925},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.516426146030426},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.494554728269577},{"id":"https://openalex.org/keywords/horizon","display_name":"Horizon","score":0.46224072575569153},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.41681915521621704},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4146902561187744},{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.41280174255371094},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.39151886105537415},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12766322493553162},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.09571322798728943},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08857834339141846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8244855403900146},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.7559086084365845},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.7103192806243896},{"id":"https://openalex.org/C88576662","wikidata":"https://www.wikidata.org/wiki/Q18646","display_name":"Episodic memory","level":3,"score":0.5652841925621033},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5557907819747925},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.516426146030426},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.494554728269577},{"id":"https://openalex.org/C159176650","wikidata":"https://www.wikidata.org/wiki/Q43261","display_name":"Horizon","level":2,"score":0.46224072575569153},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.41681915521621704},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4146902561187744},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.41280174255371094},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39151886105537415},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12766322493553162},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.09571322798728943},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08857834339141846},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11263-024-02159-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02159-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02159-8.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11263-024-02159-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02159-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02159-8.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320966","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401057329.pdf"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2064675550","https://openalex.org/W2103104224","https://openalex.org/W2260756217","https://openalex.org/W2425121537","https://openalex.org/W2593841437","https://openalex.org/W2805984364","https://openalex.org/W2896457183","https://openalex.org/W2909303996","https://openalex.org/W2926977875","https://openalex.org/W2951973805","https://openalex.org/W2962744691","https://openalex.org/W2963800628","https://openalex.org/W2963846044","https://openalex.org/W2964339842","https://openalex.org/W2964935470","https://openalex.org/W2974759213","https://openalex.org/W2987914945","https://openalex.org/W3003391301","https://openalex.org/W3034376488","https://openalex.org/W3034500398","https://openalex.org/W3034578524","https://openalex.org/W3035232877","https://openalex.org/W3040922163","https://openalex.org/W3092825347","https://openalex.org/W3100923070","https://openalex.org/W3106571068","https://openalex.org/W3106641651","https://openalex.org/W3107069568","https://openalex.org/W3109097593","https://openalex.org/W3164900052","https://openalex.org/W3172675210","https://openalex.org/W3174887918","https://openalex.org/W3176974620","https://openalex.org/W3195026654","https://openalex.org/W3204763493","https://openalex.org/W3206064582","https://openalex.org/W3208883793","https://openalex.org/W4226052928","https://openalex.org/W4285306484","https://openalex.org/W4287215707","https://openalex.org/W4304097971","https://openalex.org/W4304099262","https://openalex.org/W4312238277","https://openalex.org/W4312253995","https://openalex.org/W4312295286","https://openalex.org/W4312434279","https://openalex.org/W4312544224","https://openalex.org/W4312618696","https://openalex.org/W4312746993","https://openalex.org/W4312938887","https://openalex.org/W4386065508","https://openalex.org/W4386065870","https://openalex.org/W4386075653","https://openalex.org/W4390871755","https://openalex.org/W4394627367","https://openalex.org/W6762453230","https://openalex.org/W6765527290","https://openalex.org/W6778299498"],"related_works":["https://openalex.org/W2098987383","https://openalex.org/W2417260800","https://openalex.org/W1596203174","https://openalex.org/W2117933979","https://openalex.org/W2283130723","https://openalex.org/W103938586","https://openalex.org/W2104718772","https://openalex.org/W4236658683","https://openalex.org/W2020463334","https://openalex.org/W2026182709"],"abstract_inverted_index":{"Abstract":[0],"Vision-and-language":[1],"navigation":[2,11,23],"(VLN)":[3],"simulates":[4],"a":[5,61,93,120],"visual":[6],"agent":[7,90,103],"that":[8,70],"follows":[9],"natural-language":[10],"instructions":[12],"in":[13,22,24],"real-world":[14],"scenes.":[15],"Existing":[16],"approaches":[17],"have":[18],"made":[19],"enormous":[20],"progress":[21],"new":[25],"environments,":[26],"such":[27],"as":[28],"beam":[29],"search,":[30],"pre-exploration,":[31],"and":[32,41,135,153],"dynamic":[33],"or":[34],"hierarchical":[35],"history":[36],"encoding.":[37],"To":[38],"balance":[39],"generalization":[40],"efficiency,":[42],"we":[43,59],"resort":[44],"to":[45,91,105,114],"memorizing":[46],"visited":[47],"scenarios":[48],"apart":[49],"from":[50],"the":[51,81,89,97,102,115,129,138,144,165],"ongoing":[52],"route":[53],"while":[54,140],"navigating.":[55,141],"In":[56],"this":[57],"work,":[58],"introduce":[60],"mechanism":[62],"of":[63,75,96,111,125,146],"Episodic":[64],"Scene":[65],"memory":[66,87,139],"(ESceme)":[67],"for":[68],"VLN":[69,156],"wakes":[71],"an":[72],"agent\u2019s":[73],"memories":[74],"past":[76],"visits":[77],"when":[78],"it":[79],"enters":[80],"current":[82,116],"scene.":[83],"The":[84],"episodic":[85],"scene":[86],"allows":[88],"envision":[92],"bigger":[94],"picture":[95],"next":[98],"prediction.":[99],"This":[100],"way,":[101],"learns":[104],"utilize":[106],"dynamically":[107],"updated":[108],"information":[109],"instead":[110],"merely":[112],"adapting":[113],"observations.":[117],"We":[118,142],"provide":[119],"simple":[121],"yet":[122],"effective":[123],"implementation":[124],"ESceme":[126,147,159],"by":[127],"enhancing":[128],"accessible":[130],"views":[131],"at":[132],"each":[133],"location":[134],"progressively":[136],"completing":[137],"verify":[143],"superiority":[145],"on":[148,164],"short-horizon":[149],"(R2R),":[150],"long-horizon":[151],"(R4R),":[152],"vision-and-dialog":[154],"(CVDN)":[155],"tasks.":[157],"Our":[158],"also":[160],"wins":[161],"first":[162],"place":[163],"CVDN":[166],"leaderboard.":[167],"Code":[168],"is":[169],"available:":[170],"https://github.com/qizhust/esceme":[171],".":[172]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
