{"id":"https://openalex.org/W7138098690","doi":"https://doi.org/10.1609/aaai.v40i22.38888","title":"VPN: Visual Prompt Navigation","display_name":"VPN: Visual Prompt Navigation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138098690","doi":"https://doi.org/10.1609/aaai.v40i22.38888"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i22.38888","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38888","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38888/42850","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38888/42850","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129752002","display_name":"Shuo Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuo Feng","raw_affiliation_strings":["College of Artificial Intelligence, Nanjing University of Aeronautics and Astronautics\nThe Key Laboratory of Brain-Machine Intelligence Technology, Ministry of Education"],"affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Nanjing University of Aeronautics and Astronautics\nThe Key Laboratory of Brain-Machine Intelligence Technology, Ministry of Education","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129691987","display_name":"Zihan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zihan Wang","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129667901","display_name":"Yuchen Li","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Li","raw_affiliation_strings":["Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129714912","display_name":"Rui Kong","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Kong","raw_affiliation_strings":["Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129686557","display_name":"Hengyi Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengyi Cai","raw_affiliation_strings":["Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129689065","display_name":"Shuaiqiang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaiqiang Wang","raw_affiliation_strings":["Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704312","display_name":"Gim Hee Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Gim Hee Lee","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129748513","display_name":"Piji Li","orcid":null},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Piji Li","raw_affiliation_strings":["College of Artificial Intelligence, Nanjing University of Aeronautics and Astronautics\nThe Key Laboratory of Brain-Machine Intelligence Technology, Ministry of Education"],"affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Nanjing University of Aeronautics and Astronautics\nThe Key Laboratory of Brain-Machine Intelligence Technology, Ministry of Education","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129749922","display_name":"Shuqiang Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuqiang Jiang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5129752002"],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41044776,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"22","first_page":"18253","last_page":"18261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.0035000001080334187,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7071999907493591},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5224000215530396},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.46970000863075256},{"id":"https://openalex.org/keywords/visual-language","display_name":"Visual language","score":0.45419999957084656},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4512999951839447},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.429500013589859}],"concepts":[{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7071999907493591},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6919999718666077},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5787000060081482},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5224000215530396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5090000033378601},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.46970000863075256},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.45419999957084656},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.450300008058548},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.429500013589859},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i22.38888","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38888","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38888/42850","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i22.38888","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38888","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38888/42850","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138098690.pdf","grobid_xml":"https://content.openalex.org/works/W7138098690.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"natural":[1],"language":[2,16,80],"is":[3,83],"commonly":[4],"used":[5],"to":[6,42,130,158],"guide":[7],"embodied":[8],"agents,":[9],"the":[10,19,60,132,177],"inherent":[11],"ambiguity":[12],"and":[13,73,89,100,109,115,145,148,172],"verbosity":[14],"of":[15,21,68,179],"often":[17],"hinder":[18],"effectiveness":[20],"language-guided":[22],"navigation":[23,62,102,160],"in":[24,97],"complex":[25],"environments.":[26],"To":[27],"this":[28],"end,":[29],"we":[30,123],"propose":[31],"Visual":[32],"Prompt":[33],"Navigation":[34],"(VPN),":[35],"a":[36,65,69,126],"novel":[37],"paradigm":[38],"that":[39],"guides":[40],"agents":[41],"navigate":[43],"using":[44],"only":[45],"user-provided":[46],"visual":[47,54,61,120,166,180],"prompts":[48],"within":[49],"2D":[50],"top-view":[51,169],"maps.":[52],"This":[53],"prompt":[55,146,167,181],"primarily":[56],"focuses":[57],"on":[58,64,79],"marking":[59],"trajectory":[63],"top-down":[66],"view":[67],"scene,":[70],"offering":[71],"intuitive":[72],"spatially":[74],"grounded":[75],"guidance":[76],"without":[77],"relying":[78],"instructions.":[81],"It":[82],"more":[84],"friendly":[85],"for":[86],"non-expert":[87],"users":[88],"reduces":[90],"interpretive":[91],"ambiguity.":[92],"We":[93],"build":[94],"VPN":[95,133],"tasks":[96],"both":[98],"discrete":[99],"continuous":[101],"settings,":[103],"constructing":[104],"two":[105,136],"new":[106],"datasets,":[107],"R2R-VP":[108],"R2R-CE-VP,":[110],"by":[111],"extending":[112],"existing":[113],"R2R":[114],"R2R-CE":[116],"episodes":[117],"with":[118,135],"corresponding":[119],"prompts.":[121],"Furthermore,":[122],"introduce":[124],"VPNet,":[125],"dedicated":[127],"baseline":[128],"network":[129],"handle":[131],"tasks,":[134],"data":[137,173],"augmentation":[138,141,150,174],"strategies:":[139],"view-level":[140],"(altering":[142],"initial":[143],"headings":[144],"orientations)":[147],"trajectory-level":[149],"(incorporating":[151],"diverse":[152],"trajectories":[153],"from":[154],"large-scale":[155],"3D":[156],"scenes),":[157],"enhance":[159],"performance.":[161],"Extensive":[162],"experiments":[163],"evaluate":[164],"how":[165],"forms,":[168],"map":[170],"formats,":[171],"strategies":[175],"affect":[176],"performance":[178],"navigation.":[182]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
