{"id":"https://openalex.org/W7127317677","doi":"https://doi.org/10.48550/arxiv.2602.00551","title":"APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation","display_name":"APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7127317677","doi":"https://doi.org/10.48550/arxiv.2602.00551"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.00551","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124897386","display_name":"Daoxuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Daoxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124944881","display_name":"Ping Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114803721","display_name":"Xiaobo Xia","orcid":"https://orcid.org/0000-0003-3615-0919"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Xiaobo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124959531","display_name":"Xiu Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Xiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080207705","display_name":"Ruichen Zhen","orcid":"https://orcid.org/0000-0002-1212-6538"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhen, Ruichen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043249256","display_name":"Jianqiang Xiao","orcid":"https://orcid.org/0000-0002-5054-0318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jianqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124930361","display_name":"Shuo Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Shuo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5124897386"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8190000057220459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8190000057220459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.0340999998152256,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.024800000712275505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.5425000190734863},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5333999991416931},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.44449999928474426},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.43149998784065247},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.4235999882221222},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.41940000653266907},{"id":"https://openalex.org/keywords/obstacle-avoidance","display_name":"Obstacle avoidance","score":0.414900004863739},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.41110000014305115},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.39010000228881836},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.38269999623298645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7825000286102295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6036999821662903},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.5425000190734863},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5333999991416931},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.44449999928474426},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.43149998784065247},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.4235999882221222},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.41940000653266907},{"id":"https://openalex.org/C6683253","wikidata":"https://www.wikidata.org/wiki/Q7075535","display_name":"Obstacle avoidance","level":4,"score":0.414900004863739},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.41110000014305115},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3959999978542328},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.39010000228881836},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.38269999623298645},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38199999928474426},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3440999984741211},{"id":"https://openalex.org/C49020025","wikidata":"https://www.wikidata.org/wiki/Q1059099","display_name":"Chaining","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C2779110517","wikidata":"https://www.wikidata.org/wiki/Q1240788","display_name":"Supervisor","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C2779151265","wikidata":"https://www.wikidata.org/wiki/Q1156791","display_name":"Copying","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.29580000042915344},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2955999970436096},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C2987819851","wikidata":"https://www.wikidata.org/wiki/Q191839","display_name":"Aerial imagery","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2883000075817108},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C2780103172","wikidata":"https://www.wikidata.org/wiki/Q1309721","display_name":"Visual Objects","level":3,"score":0.28619998693466187},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C124527596","wikidata":"https://www.wikidata.org/wiki/Q17029359","display_name":"Hierarchical control system","level":3,"score":0.28139999508857727},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25859999656677246},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2574000060558319}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.00551","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.00551","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.00551","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.00551","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7319401502609253}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Aerial":[0,13],"Object":[1],"Goal":[2],"Navigation,":[3],"a":[4,23,68,87,102,137,165],"challenging":[5,203],"frontier":[6],"in":[7,44,79,182,223],"Embodied":[8],"AI,":[9],"requires":[10],"an":[11,118,149],"Unmanned":[12],"Vehicle":[14],"(UAV)":[15],"agent":[16,71],"to":[17,106,152],"autonomously":[18],"explore,":[19],"reason,":[20],"and":[21,30,48,52,55,76,113,139,155,168,177,199,210],"identify":[22],"specific":[24],"target":[25,77,157],"using":[26],"only":[27],"visual":[28],"perception":[29],"language":[31],"description.":[32],"However,":[33],"existing":[34],"methods":[35],"struggle":[36],"with":[37,127],"the":[38,98,173,179,190,194,211],"memorization":[39],"of":[40,101,193,213],"complex":[41,80],"spatial":[42,134],"representations":[43],"aerial":[45,81],"environments,":[46],"reliable":[47],"interpretable":[49,119],"action":[50],"decision-making,":[51],"inefficient":[53],"exploration":[54,75],"information":[56],"gathering.":[57],"To":[58],"address":[59],"these":[60,160],"challenges,":[61],"we":[62],"introduce":[63],"\\textbf{APEX}":[64],"(Aerial":[65],"Parallel":[66],"Explorer),":[67],"novel":[69],"hierarchical":[70,215],"designed":[72],"for":[73],"efficient":[74],"acquisition":[78],"settings.":[82],"APEX":[83,188],"is":[84,221],"built":[85],"upon":[86],"modular,":[88],"three-part":[89],"architecture:":[90],"1)":[91],"Dynamic":[92],"Spatio-Semantic":[93],"Mapping":[94],"Memory,":[95],"which":[96,130,147],"leverages":[97],"zero-shot":[99],"capability":[100],"Vision-Language":[103],"Model":[104],"(VLM)":[105],"dynamically":[107],"construct":[108],"high-resolution":[109],"3D":[110],"Attraction,":[111],"Exploration,":[112],"Obstacle":[114],"maps,":[115],"serving":[116],"as":[117],"memory":[120],"mechanism.":[121],"2)":[122],"Action":[123],"Decision":[124],"Module,":[125,146],"trained":[126],"reinforcement":[128],"learning,":[129],"translates":[131],"this":[132],"rich":[133],"understanding":[135],"into":[136,164],"fine-grained":[138],"robust":[140],"control":[141],"policy.":[142],"3)":[143],"Target":[144],"Grounding":[145],"employs":[148],"open-vocabulary":[150],"detector":[151],"achieve":[153],"definitive":[154],"generalizable":[156],"identification.":[158],"All":[159],"components":[161],"are":[162],"integrated":[163],"hierarchical,":[166],"asynchronous,":[167],"parallel":[169],"framework,":[170],"effectively":[171],"bypassing":[172],"VLM's":[174],"inference":[175],"latency":[176],"boosting":[178],"agent's":[180],"proactivity":[181],"exploration.":[183],"Extensive":[184],"experiments":[185],"show":[186],"that":[187],"outperforms":[189],"previous":[191],"state":[192],"art":[195],"by":[196],"+4.2\\%":[197],"SR":[198],"+2.8\\%":[200],"SPL":[201],"on":[202],"UAV-ON":[204],"benchmarks,":[205],"demonstrating":[206],"its":[207,214],"superior":[208],"efficiency":[209],"effectiveness":[212],"asynchronous":[216],"design.":[217],"Our":[218],"source":[219],"code":[220],"provided":[222],"\\href{https://github.com/4amGodvzx/apex}{GitHub}":[224]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-04T00:00:00"}
