{"id":"https://openalex.org/W7159668938","doi":"https://doi.org/10.48550/arxiv.2604.27620","title":"SpaAct: Spatially-Activated Transition Learning with Curriculum Adaptation for Vision-Language Navigation","display_name":"SpaAct: Spatially-Activated Transition Learning with Curriculum Adaptation for Vision-Language Navigation","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159668938","doi":"https://doi.org/10.48550/arxiv.2604.27620"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27620","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048567115","display_name":"Pengna Li","orcid":"https://orcid.org/0000-0002-8477-8340"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Pengna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034105691","display_name":"Kangyi Wu","orcid":"https://orcid.org/0000-0001-7382-4949"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Kangyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134941492","display_name":"Shaoqing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Shaoqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134929378","display_name":"Fang Li (41162)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Fang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134943018","display_name":"Hanbing Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hanbing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134955178","display_name":"Lin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110962115","display_name":"Kailin Lyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Kailin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134977443","display_name":"Long Chen (315739)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134951809","display_name":"Zhi-Xin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhi-Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134958279","display_name":"Nanning Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Nanning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.002199999988079071,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7342000007629395},{"id":"https://openalex.org/keywords/transition","display_name":"Transition (genetics)","score":0.6384000182151794},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.611299991607666},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.5899999737739563},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5616999864578247},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46309998631477356},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4449000060558319},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.3930000066757202},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.37779998779296875}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7342000007629395},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6506999731063843},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.6384000182151794},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.611299991607666},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5824999809265137},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5616999864578247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48069998621940613},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46309998631477356},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3930000066757202},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.3456999957561493},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3019999861717224},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6836071610450745,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-and-Language":[0],"Navigation":[1],"(VLN)":[2],"aims":[3],"to":[4,9,15,28,84,101,129,158,163,171,197],"enable":[5],"an":[6],"embodied":[7],"agent":[8],"follow":[10],"natural-language":[11],"instructions":[12],"and":[13,46,93,109,122,186,195],"navigate":[14],"a":[16,57,135,145],"target":[17],"location":[18],"in":[19,69,134],"unseen":[20],"3D":[21],"environments.":[22],"We":[23,190],"argue":[24],"that":[25,63,152,180],"adapting":[26],"VLMs":[27],"VLN":[29],"requires":[30],"endowing":[31],"them":[32],"with":[33],"two":[34,74,112],"complementary":[35],"capabilities":[36],"for":[37],"acquiring":[38],"such":[39],"awareness,":[40],"namely":[41],"backward":[42,119],"action":[43,88,120],"reasoning":[44,121],"(why)":[45],"forward":[47,123],"transition":[48,124],"prediction~(how).":[49],"Based":[50],"on":[51,107,117,175],"this":[52],"insight,":[53],"we":[54,142],"propose":[55],"SpaAct,":[56],"simple":[58],"yet":[59],"effective":[60],"training":[61,154],"framework":[62],"activates":[64],"the":[65,82,86,99,103,127,161,193],"dynamic":[66,131],"spatial":[67,75,132],"awareness":[68,133],"VLMs.":[70],"Specifically,":[71],"SpaAct":[72,181],"introduces":[73],"activation":[76],"tasks:":[77],"Action":[78],"Retrospection,":[79],"which":[80,97],"asks":[81],"model":[83,100,128,162],"infer":[85],"executed":[87],"sequence":[89],"from":[90,156,168],"visual":[91,104],"transitions,":[92],"Future":[94],"Frame":[95],"Selection,":[96],"forces":[98],"predict":[102],"transitions":[105],"conditioned":[106],"history":[108],"action.":[110],"These":[111],"objectives":[113],"provide":[114],"lightweight":[115],"supervision":[116],"both":[118],"prediction,":[125],"encouraging":[126],"build":[130],"VLM-friendly":[136],"way.":[137],"To":[138],"further":[139],"stabilize":[140],"adaptation,":[141],"design":[143],"TriPA,":[144],"Tri-factor":[146],"Progressive":[147],"Adaptive":[148],"curriculum":[149],"learning":[150],"method":[151],"organizes":[153],"samples":[155],"easy":[157],"hard,":[159],"allowing":[160],"gradually":[164],"acquire":[165],"navigation":[166,185],"skills":[167],"basic":[169],"locomotion":[170],"long-horizon":[172],"reasoning.":[173],"Experiments":[174],"standard":[176],"VLN-CE":[177],"benchmarks":[178],"show":[179],"consistently":[182],"improves":[183],"VLM-based":[184],"achieves":[187],"state-of-the-art":[188],"performance.":[189],"will":[191],"release":[192],"code":[194],"models":[196],"support":[198],"future":[199],"research.":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
