{"id":"https://openalex.org/W7155492613","doi":"https://doi.org/10.48550/arxiv.2604.21924","title":"Long-Horizon Manipulation via Trace-Conditioned VLA Planning","display_name":"Long-Horizon Manipulation via Trace-Conditioned VLA Planning","publication_year":2026,"publication_date":"2026-04-23","ids":{"openalex":"https://openalex.org/W7155492613","doi":"https://doi.org/10.48550/arxiv.2604.21924"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.21924","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21924","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.21924","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015322847","display_name":"Isabella Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Isabella","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089283494","display_name":"An\u2010Chieh Cheng","orcid":"https://orcid.org/0000-0002-2203-1911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, An-Chieh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134492612","display_name":"Rui Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134494517","display_name":"Geng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Geng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134515250","display_name":"Ri-Zhao Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Ri-Zhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134560372","display_name":"Xueyan Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Xueyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134560152","display_name":"Sha Yi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi, Sha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134493648","display_name":"Hongxu Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Hongxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134466985","display_name":"Xiaolong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaolong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134533739","display_name":"Sifei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Sifei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8138999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8138999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.0835999995470047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.03180000185966492,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.7771999835968018},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.6995999813079834},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.669700026512146},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5576000213623047},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.5497999787330627},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5217999815940857},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.47760000824928284},{"id":"https://openalex.org/keywords/continuation","display_name":"Continuation","score":0.43959999084472656}],"concepts":[{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.7771999835968018},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.6995999813079834},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6898000240325928},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.669700026512146},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5576000213623047},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.5497999787330627},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5217999815940857},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.47760000824928284},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4684000015258789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44110000133514404},{"id":"https://openalex.org/C88626702","wikidata":"https://www.wikidata.org/wiki/Q1128903","display_name":"Continuation","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3564999997615814},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C6683253","wikidata":"https://www.wikidata.org/wiki/Q7075535","display_name":"Obstacle avoidance","level":4,"score":0.3124000132083893},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30480000376701355},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.29899999499320984},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26499998569488525},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.21924","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21924","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.21924","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21924","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7837753295898438,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Long-horizon":[0],"manipulation":[1,171],"remains":[2],"challenging":[3],"for":[4],"vision-language-action":[5],"(VLA)":[6],"policies:":[7],"real":[8,177],"tasks":[9],"are":[10],"multi-step,":[11],"progress-dependent,":[12],"and":[13,46,80,96,143,150,169,174,187],"brittle":[14,157],"to":[15,30,94,98,106],"compounding":[16],"execution":[17,29],"errors.":[18],"We":[19],"present":[20],"LoHo-Manip,":[21],"a":[22,35,50,59,66,82,86,176],"modular":[23],"framework":[24],"that":[25,63],"scales":[26],"short-horizon":[27],"VLA":[28,103],"long-horizon":[31,114,165,184],"instruction":[32],"following":[33,121],"via":[34],"dedicated":[36],"task-management":[37],"VLM.":[38],"The":[39,101],"manager":[40],"is":[41,47,104],"decoupled":[42],"from":[43],"the":[44,54,109,122,126],"executor":[45,102],"invoked":[48],"in":[49,140,172,183],"receding-horizon":[51],"manner:":[52],"given":[53],"current":[55],"observation,":[56],"it":[57],"predicts":[58],"progress-aware":[60],"remaining":[61,74,127],"plan":[62,128],"combines":[64],"(i)":[65],"subtask":[67],"sequence":[68],"with":[69],"an":[70,133],"explicit":[71],"done":[72],"+":[73],"split":[75],"as":[76],"lightweight":[77],"language":[78],"memory,":[79],"(ii)":[81],"visual":[83],"trace":[84],"--":[85],"compact":[87],"2D":[88],"keypoint":[89],"trajectory":[90,167],"prompt":[91],"specifying":[92],"where":[93],"go":[95],"what":[97],"approach":[99],"next.":[100],"adapted":[105],"condition":[107],"on":[108,175],"rendered":[110],"trace,":[111],"thereby":[112],"turning":[113],"decision-making":[115],"into":[116],"repeated":[117],"local":[118],"control":[119],"by":[120],"trace.":[123],"Crucially,":[124],"predicting":[125],"at":[129],"each":[130],"step":[131],"yields":[132],"implicit":[134],"closed":[135],"loop:":[136],"failed":[137],"steps":[138],"persist":[139],"subsequent":[141],"outputs,":[142],"traces":[144],"update":[145],"accordingly,":[146],"enabling":[147],"automatic":[148],"continuation":[149],"replanning":[151],"without":[152],"hand-crafted":[153],"recovery":[154],"logic":[155],"or":[156],"visual-history":[158],"buffers.":[159],"Extensive":[160],"experiments":[161],"spanning":[162],"embodied":[163],"planning,":[164],"reasoning,":[166],"prediction,":[168],"end-to-end":[170],"simulation":[173],"Franka":[178],"robot":[179],"demonstrate":[180],"strong":[181],"gains":[182],"success,":[185],"robustness,":[186],"out-of-distribution":[188],"generalization.":[189],"Project":[190],"page:":[191],"https://www.liuisabella.com/LoHoManip":[192]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-25T00:00:00"}
