{"id":"https://openalex.org/W4415707796","doi":"https://doi.org/10.1109/icme59968.2025.11209085","title":"Language-Conditioned Waypoint Predictor for Continuous Vision-and-Language Navigation","display_name":"Language-Conditioned Waypoint Predictor for Continuous Vision-and-Language Navigation","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707796","doi":"https://doi.org/10.1109/icme59968.2025.11209085"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209085","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074740407","display_name":"Zeyu Wang","orcid":"https://orcid.org/0009-0007-5875-5544"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zeyu Wang","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070842891","display_name":"Yuankai Qi","orcid":"https://orcid.org/0000-0003-4312-5682"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yuankai Qi","raw_affiliation_strings":["Macquarie University"],"affiliations":[{"raw_affiliation_string":"Macquarie University","institution_ids":["https://openalex.org/I99043593"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101399813","display_name":"Dong An","orcid":"https://orcid.org/0000-0002-1347-8535"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Dong An","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence (MBZUAI)"],"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence (MBZUAI)","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118900014","display_name":"Xu Yang","orcid":"https://orcid.org/0009-0004-5180-8648"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Yang","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100708466","display_name":"Hongxin Li","orcid":"https://orcid.org/0000-0002-2356-1607"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongxin Li","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101589393","display_name":"Zhaoxiang Zhang","orcid":"https://orcid.org/0000-0002-1469-1469"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxiang Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS),School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210100255"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074740407"],"corresponding_institution_ids":["https://openalex.org/I4210100255"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33630389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.004399999976158142,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/waypoint","display_name":"Waypoint","score":0.9947999715805054},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5015000104904175},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.35339999198913574},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.3499000072479248},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.34060001373291016},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.29919999837875366}],"concepts":[{"id":"https://openalex.org/C2781271823","wikidata":"https://www.wikidata.org/wiki/Q138081","display_name":"Waypoint","level":2,"score":0.9947999715805054},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.626800000667572},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.44679999351501465},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.35359999537467957},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3499000072479248},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2888000011444092},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2793000042438507},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2630000114440918},{"id":"https://openalex.org/C43472768","wikidata":"https://www.wikidata.org/wiki/Q7855620","display_name":"Turn-by-turn navigation","level":5,"score":0.2619999945163727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209085","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2963800628","https://openalex.org/W2964935470","https://openalex.org/W2970340522","https://openalex.org/W2979727876","https://openalex.org/W3009928773","https://openalex.org/W3034500398","https://openalex.org/W3034578524","https://openalex.org/W3100923070","https://openalex.org/W3109085430","https://openalex.org/W3165915253","https://openalex.org/W3172675210","https://openalex.org/W3176974620","https://openalex.org/W3192009892","https://openalex.org/W3203338521","https://openalex.org/W3206064780","https://openalex.org/W4312253995","https://openalex.org/W4312434279","https://openalex.org/W4312544224","https://openalex.org/W4312581490","https://openalex.org/W4312729859","https://openalex.org/W4312996039","https://openalex.org/W4385245566","https://openalex.org/W4394627367"],"related_works":[],"abstract_inverted_index":{"Waypoint":[0],"prediction":[1],"is":[2,110],"a":[3,69,95,149],"popular":[4],"technique":[5],"for":[6,57,101],"Vision-and-Language":[7],"Navigation":[8],"in":[9,61],"Continuous":[10],"Environments":[11],"(VLN-CE),":[12],"which":[13],"abstracts":[14],"navigable":[15],"locations":[16,59],"as":[17,82],"waypoints":[18,56],"to":[19,50,53,72,98,147,153],"ease":[20],"the":[21,44,51,62,74,77,83,89,102,106,113,117,123,129,134,137,142,156,159,164,168,189],"subsequent":[22],"action":[23],"prediction.":[24],"Nevertheless,":[25],"we":[26,67,93,121],"found":[27],"current":[28],"waypoint":[29,79,103,108,131,160,169,179],"predictors":[30],"are":[31],"not":[32],"always":[33],"accurate,":[34],"limiting":[35],"navigation\u2019s":[36],"overall":[37],"performance.":[38],"One":[39],"possible":[40],"reason":[41],"may":[42],"be":[43],"lack":[45],"of":[46,76,166],"language":[47,100,138],"context,":[48],"leading":[49],"failure":[52],"generate":[54],"corresponding":[55],"critical":[58],"mentioned":[60],"instructions.":[63],"To":[64],"that":[65,177],"end,":[66],"propose":[68],"novel":[70],"framework":[71],"enable":[73],"training":[75,151],"language-conditioned":[78,107],"predictor.":[80,104,132],"First,":[81],"VLN-CE":[84,125],"agents":[85],"ground":[86],"instructions":[87],"with":[88,112,128],"environment":[90],"when":[91],"navigating,":[92],"employ":[94],"pre-trained":[96],"agent":[97,127,140,144,157],"encode":[99],"Second,":[105],"predictor":[109,170],"trained":[111],"data":[114],"collected":[115],"using":[116],"same":[118],"agent.":[119,173],"Third,":[120],"train":[122,155],"new":[124],"navigation":[126,143,172],"proposed":[130],"Fourth,":[133],"disparity":[135],"between":[136,191],"encoder":[139],"and":[141,158,171,194],"drives":[145],"us":[146],"devise":[148],"cycle":[150],"scheme":[152],"alternately":[154],"predictor,":[161],"further":[162],"enhancing":[163],"performance":[165,181],"both":[167],"Experimental":[174],"results":[175],"show":[176],"our":[178],"predictor\u2019s":[180],"surpasses":[182],"all":[183],"existing":[184],"ones.":[185],"With":[186],"better":[187],"waypoints,":[188],"gap":[190],"waypoint-based":[192],"methods":[193],"their":[195],"upper":[196],"bound":[197],"narrows":[198],"by":[199],"about":[200],"60%.":[201]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
