{"id":"https://openalex.org/W7147490114","doi":"https://doi.org/10.1109/cnml68938.2026.11453129","title":"Design and Implementation of an Intelligent Navigation System Fusing Visual and Language Features","display_name":"Design and Implementation of an Intelligent Navigation System Fusing Visual and Language Features","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7147490114","doi":"https://doi.org/10.1109/cnml68938.2026.11453129"},"language":null,"primary_location":{"id":"doi:10.1109/cnml68938.2026.11453129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11453129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132553574","display_name":"Yifan Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Yifan Qin","raw_affiliation_strings":["The University of Sydney,Sydney,Australia,2006"],"affiliations":[{"raw_affiliation_string":"The University of Sydney,Sydney,Australia,2006","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5132553574"],"corresponding_institution_ids":["https://openalex.org/I129604602"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.91860512,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"574","last_page":"579"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7486000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7486000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.05649999901652336,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.051899999380111694,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.5949000120162964},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5837000012397766},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.5547999739646912},{"id":"https://openalex.org/keywords/navigation-system","display_name":"Navigation system","score":0.5278000235557556},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.40950000286102295},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3968999981880188},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.3700999915599823},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.3693999946117401},{"id":"https://openalex.org/keywords/overlay","display_name":"Overlay","score":0.35199999809265137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7534999847412109},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.5949000120162964},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5837000012397766},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.5547999739646912},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.513700008392334},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.45910000801086426},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42559999227523804},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.40950000286102295},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3968999981880188},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C136085584","wikidata":"https://www.wikidata.org/wiki/Q910289","display_name":"Overlay","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.34049999713897705},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C26990112","wikidata":"https://www.wikidata.org/wiki/Q6887224","display_name":"Mobile robot navigation","level":5,"score":0.32749998569488525},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.30660000443458557},{"id":"https://openalex.org/C145804949","wikidata":"https://www.wikidata.org/wiki/Q478123","display_name":"Situation awareness","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C49777639","wikidata":"https://www.wikidata.org/wiki/Q5264354","display_name":"Design language","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26969999074935913},{"id":"https://openalex.org/C31352089","wikidata":"https://www.wikidata.org/wiki/Q3750474","display_name":"Systems design","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C17305859","wikidata":"https://www.wikidata.org/wiki/Q382944","display_name":"Soar","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cnml68938.2026.11453129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11453129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W3034500398","https://openalex.org/W3035232877","https://openalex.org/W3043971245","https://openalex.org/W3109097593","https://openalex.org/W3206064582","https://openalex.org/W4312938887","https://openalex.org/W4390871884","https://openalex.org/W4400258109","https://openalex.org/W4402703032","https://openalex.org/W4402944034","https://openalex.org/W4403878248","https://openalex.org/W4415795795"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,105],"design":[4],"and":[5,49,51,71,96,119,130,135],"implementation":[6],"of":[7],"an":[8],"intelligent":[9],"navigation":[10,23,117],"system":[11,29,106],"that":[12,34,45,101,125],"fuses":[13],"visual":[14,112],"perception":[15],"with":[16,41,83],"language":[17,43,109],"understanding":[18],"to":[19,58,78],"support":[20],"robust":[21],"goal-directed":[22],"under":[24],"natural,":[25],"human-authored":[26],"instructions.":[27],"The":[28],"couples":[30],"a":[31,42,60,75,97,120],"vision":[32],"encoder":[33,44],"extracts":[35],"scene":[36],"cues":[37],"from":[38],"egocentric":[39],"observations":[40],"parses":[46],"intent,":[47],"constraints,":[48],"landmarks,":[50],"aligns":[52],"both":[53],"streams":[54],"via":[55],"bidirectional":[56],"attention":[57],"form":[59],"grounded":[61],"state":[62],"representation.":[63],"A":[64],"semantic":[65],"topological":[66],"memory":[67],"stores":[68],"visited":[69],"viewpoints":[70],"uncertainty":[72],"estimates,":[73],"enabling":[74],"hierarchical":[76],"planner":[77],"combine":[79],"long-horizon":[80],"route":[81],"selection":[82],"short-horizon":[84],"control.":[85],"To":[86],"improve":[87],"real-world":[88],"readiness,":[89],"we":[90],"integrate":[91],"safety":[92],"monitoring,":[93],"failure":[94],"detection,":[95],"lightweight":[98],"\"ask-for-help\"":[99],"mechanism":[100],"requests":[102],"clarification":[103],"when":[104],"detects":[107],"ambiguous":[108],"or":[110],"inconsistent":[111],"evidence.":[113],"Experiments":[114],"on":[115],"indoor":[116],"benchmarks":[118],"small-scale":[121],"real-robot":[122],"prototype":[123],"show":[124],"multimodal":[126],"fusion":[127],"improves":[128],"success":[129],"path":[131],"efficiency":[132],"over":[133],"vision-only":[134],"language-only":[136],"baselines":[137],"while":[138],"keeping":[139],"latency":[140],"within":[141],"interactive":[142],"limits.":[143]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
