{"id":"https://openalex.org/W7139060579","doi":"https://doi.org/10.48550/arxiv.2603.17351","title":"OmniVLN: Omnidirectional 3D Perception and Token-Efficient LLM Reasoning for Visual-Language Navigation across Air and Ground Platforms","display_name":"OmniVLN: Omnidirectional 3D Perception and Token-Efficient LLM Reasoning for Visual-Language Navigation across Air and Ground Platforms","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7139060579","doi":"https://doi.org/10.48550/arxiv.2603.17351"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17351","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130195108","display_name":"Zhongyuang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Zhongyuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130007559","display_name":"Min He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009649667","display_name":"Shaonan Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Shaonan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121945346","display_name":"Xinhang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xinhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129826972","display_name":"Muqing Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Muqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129967141","display_name":"Jianping Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jianping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129938439","display_name":"Jianfei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jianfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130172467","display_name":"Lihua Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Lihua","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5130195108"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7074000239372253,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7074000239372253,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.1907999962568283,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.016599999740719795,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.508400022983551},{"id":"https://openalex.org/keywords/omnidirectional-antenna","display_name":"Omnidirectional antenna","score":0.48339998722076416},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.48260000348091125},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.4659999907016754},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.429500013589859},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42800000309944153},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.41510000824928284},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.4050999879837036},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4025000035762787},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3968999981880188}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7299000024795532},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7210000157356262},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6040999889373779},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.508400022983551},{"id":"https://openalex.org/C24027999","wikidata":"https://www.wikidata.org/wiki/Q2176348","display_name":"Omnidirectional antenna","level":3,"score":0.48339998722076416},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48260000348091125},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.429500013589859},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42800000309944153},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.41510000824928284},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4025000035762787},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3968999981880188},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.3885999917984009},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C2777953668","wikidata":"https://www.wikidata.org/wiki/Q684116","display_name":"Omnidirectional camera","level":4,"score":0.36000001430511475},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.35350000858306885},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C146159030","wikidata":"https://www.wikidata.org/wiki/Q7625099","display_name":"Structure from motion","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C2775935494","wikidata":"https://www.wikidata.org/wiki/Q741964","display_name":"Search and rescue","level":3,"score":0.3149999976158142},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C58581272","wikidata":"https://www.wikidata.org/wiki/Q12741163","display_name":"Workspace","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C26990112","wikidata":"https://www.wikidata.org/wiki/Q6887224","display_name":"Mobile robot navigation","level":5,"score":0.2809999883174896},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2597000002861023},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C145804949","wikidata":"https://www.wikidata.org/wiki/Q478123","display_name":"Situation awareness","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C2778597888","wikidata":"https://www.wikidata.org/wiki/Q172169","display_name":"3D city models","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17351","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17351","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Language-guided":[0],"embodied":[1],"navigation":[2,80,174,214],"requires":[3],"an":[4,147,230],"agent":[5],"to":[6,121,160,198,206,218,234],"interpret":[7],"object-referential":[8],"instructions,":[9],"search":[10],"across":[11],"multiple":[12],"rooms,":[13,164],"localize":[14,168],"the":[15,71,141,158,188,227],"referenced":[16],"target,":[17],"and":[18,52,94,102,123,126,134,171,181,212,229],"execute":[19],"reliable":[20],"motion":[21],"toward":[22],"it.":[23],"Existing":[24],"systems":[25],"remain":[26],"limited":[27],"in":[28,208],"real":[29],"indoor":[30],"environments":[31],"because":[32],"narrow":[33],"field-of-view":[34],"sensing":[35],"exposes":[36],"only":[37],"a":[38,77,99,106,112,221],"partial":[39],"local":[40,179],"scene":[41],"at":[42],"each":[43],"step,":[44],"often":[45],"forcing":[46],"repeated":[47],"rotations,":[48],"delaying":[49],"target":[50,169],"discovery,":[51],"producing":[53],"fragmented":[54],"spatial":[55,154,193],"understanding;":[56],"meanwhile,":[57],"directly":[58],"prompting":[59],"LLMs":[60],"with":[61,87,152],"dense":[62],"3D":[63,85,149],"maps":[64],"or":[65],"exhaustive":[66],"object":[67],"lists":[68],"quickly":[69],"exceeds":[70],"context":[72],"budget.":[73],"We":[74,224],"present":[75],"OmniVLN,":[76],"zero-shot":[78],"visual-language":[79],"framework":[81],"that":[82,187],"couples":[83],"omnidirectional":[84,231],"perception":[86],"token-efficient":[88],"hierarchical":[89,190],"reasoning":[90],"for":[91],"both":[92],"aerial":[93],"ground":[95],"robots.":[96],"OmniVLN":[97],"fuses":[98],"rotating":[100],"LiDAR":[101],"panoramic":[103],"vision":[104],"into":[105,146],"hardware-agnostic":[107],"mapping":[108],"stack,":[109],"incrementally":[110],"constructs":[111],"five-layer":[113],"Dynamic":[114],"Scene":[115],"Graph":[116],"(DSG)":[117],"from":[118,196],"mesh":[119],"geometry":[120],"room-":[122],"building-level":[124],"structure,":[125],"stabilizes":[127],"high-level":[128],"topology":[129],"through":[130],"persistent-homology-based":[131],"room":[132],"partitioning":[133],"hybrid":[135],"geometric/VLM":[136],"relation":[137],"verification.":[138],"For":[139],"navigation,":[140],"global":[142],"DSG":[143],"is":[144],"transformed":[145],"agent-centric":[148],"octant":[150],"representation":[151],"multi-resolution":[153],"attention":[155],"prompting,":[156],"enabling":[157],"LLM":[159],"progressively":[161],"filter":[162],"candidate":[163],"infer":[165],"egocentric":[166],"orientation,":[167],"objects,":[170],"emit":[172],"executable":[173],"primitives":[175],"while":[176],"preserving":[177],"fine":[178],"detail":[180],"compact":[182],"long-range":[183],"memory.":[184],"Experiments":[185],"show":[186],"proposed":[189],"interface":[191],"improves":[192,213],"referring":[194],"accuracy":[195],"77.27\\%":[197],"93.18\\%,":[199],"reduces":[200],"cumulative":[201],"prompt":[202],"tokens":[203],"by":[204,216],"up":[205,217],"61.7\\%":[207],"cluttered":[209],"multi-room":[210],"settings,":[211],"success":[215],"11.68\\%":[219],"over":[220],"flat-list":[222],"baseline.":[223],"will":[225],"release":[226],"code":[228],"multimodal":[232],"dataset":[233],"support":[235],"reproducible":[236],"research.":[237]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
