{"id":"https://openalex.org/W7161728567","doi":"https://doi.org/10.48550/arxiv.2605.17249","title":"SEDualVLN: A Spatially-Enhanced Dual-System for Vision-Language Navigation","display_name":"SEDualVLN: A Spatially-Enhanced Dual-System for Vision-Language Navigation","publication_year":2026,"publication_date":"2026-05-17","ids":{"openalex":"https://openalex.org/W7161728567","doi":"https://doi.org/10.48550/arxiv.2605.17249"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17249","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130045839","display_name":"Jingzhi Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jingzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136465430","display_name":"Junkai Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Junkai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136455960","display_name":"Wenxuan Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Wenxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086136558","display_name":"Haoyang Yang","orcid":"https://orcid.org/0000-0002-1646-0445"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Haoyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136476992","display_name":"Hailong Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Hailong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136477147","display_name":"Haoang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136456336","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0002-8700-7621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.002199999988079071,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/navigational-aid","display_name":"Navigational aid","score":0.5667999982833862},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5582000017166138},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5507000088691711},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5123999714851379},{"id":"https://openalex.org/keywords/navigation-system","display_name":"Navigation system","score":0.460099995136261},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.43849998712539673},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.43149998784065247},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.40939998626708984}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.682200014591217},{"id":"https://openalex.org/C2778761605","wikidata":"https://www.wikidata.org/wiki/Q3565782","display_name":"Navigational aid","level":2,"score":0.5667999982833862},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5582000017166138},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5507000088691711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5164999961853027},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5123999714851379},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.460099995136261},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.43849998712539673},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.43149998784065247},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4302999973297119},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.40939998626708984},{"id":"https://openalex.org/C43472768","wikidata":"https://www.wikidata.org/wiki/Q7855620","display_name":"Turn-by-turn navigation","level":5,"score":0.4041000008583069},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.38749998807907104},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.36890000104904175},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C26990112","wikidata":"https://www.wikidata.org/wiki/Q6887224","display_name":"Mobile robot navigation","level":5,"score":0.3264999985694885},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3221000134944916},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2985000014305115},{"id":"https://openalex.org/C161840515","wikidata":"https://www.wikidata.org/wiki/Q186131","display_name":"Terrain","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.266400009393692}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0,12],"Navigation":[1],"(VLN)":[2],"approaches":[3],"have":[4],"currently":[5],"followed":[6],"two":[7],"primary":[8],"paradigms:":[9],"the":[10,25,112,121,141,154,174],"end-to-end":[11,43],"Model":[13,34],"(VLM)":[14],"policy":[15],"fine-tuned":[16],"on":[17,166],"navigation":[18,48,155],"trajectories":[19],"to":[20,39,139,152],"directly":[21],"predict":[22],"actions,":[23],"and":[24,49,65,93,169,179],"zero-shot":[26,54],"modular":[27],"pipeline":[28],"integrating":[29],"pre-trained":[30],"Multimodal":[31],"Large":[32],"Language":[33],"(MLLM)":[35],"for":[36,62,98],"training-free":[37],"generalization":[38],"unseen":[40],"environments.":[41],"However,":[42],"methods":[44,55],"struggle":[45],"with":[46,90,107],"long-horizon":[47],"lack":[50],"dynamic":[51],"reasoning,":[52],"whereas":[53],"are":[56],"constrained":[57],"by":[58,116],"limited":[59],"spatial":[60,95,137],"grounding":[61],"reliable":[63],"planning":[64],"also":[66],"require":[67],"substantial":[68],"reasoning":[69],"time.":[70],"To":[71],"bridge":[72],"this":[73],"gap,":[74],"we":[75],"introduce":[76],"SEDualVLN,":[77],"a":[78,86,104,108,158],"spatially-enhanced":[79],"dual-system":[80],"VLN":[81,147],"framework.":[82],"System":[83,101],"1":[84],"is":[85],"VLM":[87],"model":[88],"enhanced":[89],"both":[91],"global":[92],"local":[94],"awareness,":[96],"used":[97],"action":[99],"generation.":[100],"2":[102],"integrates":[103],"general":[105],"MLLM":[106,113],"mapping":[109],"module,":[110],"wherein":[111],"plans":[114],"waypoints":[115],"leveraging":[117],"top-down":[118],"views":[119],"of":[120,127,136,144,176],"real-time":[122],"3D":[123],"map":[124],"alongside":[125],"streams":[126],"rendered":[128],"path":[129],"images.":[130],"Both":[131],"systems":[132],"leverage":[133],"different":[134],"forms":[135],"enhancement":[138],"cultivate":[140],"agent's":[142],"sense":[143],"direction":[145],"in":[146],"tasks.":[148],"Ultimately,":[149],"they":[150],"cooperate":[151],"complete":[153],"task":[156],"through":[157],"fast-slow":[159],"coordinated":[160],"approach.":[161],"SEDualVLN":[162],"achieves":[163],"state-of-the-art":[164],"performance":[165],"VLN-CE":[167],"benchmarks,":[168],"further":[170],"ablation":[171],"studies":[172],"demonstrate":[173],"effectiveness":[175],"each":[177],"system":[178],"module.":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
