{"id":"https://openalex.org/W7134925622","doi":"https://doi.org/10.48550/arxiv.2603.09513","title":"Beyond Short-Horizon: VQ-Memory for Robust Long-Horizon Manipulation in Non-Markovian Simulation Benchmarks","display_name":"Beyond Short-Horizon: VQ-Memory for Robust Long-Horizon Manipulation in Non-Markovian Simulation Benchmarks","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134925622","doi":"https://doi.org/10.48550/arxiv.2603.09513"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09513","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09513","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128783574","display_name":"Wang Honghui","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Honghui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128707617","display_name":"Jing Zhi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Zhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107291655","display_name":"Jicong Ao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ao, Jicong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128728181","display_name":"Song Shiji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Shiji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128769081","display_name":"Li Xuelong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xuelong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128692742","display_name":"Huang Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Gao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128760072","display_name":"Bai Chenjia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Chenjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5128783574"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2053000032901764,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2053000032901764,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.15940000116825104,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.10599999874830246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6269000172615051},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5483999848365784},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5178999900817871},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5005999803543091},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4959000051021576},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4941999912261963},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.44190001487731934},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4163999855518341},{"id":"https://openalex.org/keywords/external-data-representation","display_name":"External Data Representation","score":0.40779998898506165}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7281000018119812},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6269000172615051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.555899977684021},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5483999848365784},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5178999900817871},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5005999803543091},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4959000051021576},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4941999912261963},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.44190001487731934},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4163999855518341},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4007999897003174},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.3474999964237213},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C25016198","wikidata":"https://www.wikidata.org/wiki/Q781833","display_name":"Temporal logic","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2517000138759613},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09513","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09513","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"high":[1],"cost":[2],"of":[3,38,44],"collecting":[4],"real-robot":[5],"data":[6,18],"has":[7],"made":[8],"robotic":[9],"simulation":[10,65],"a":[11,55,62,110],"scalable":[12,63],"platform":[13],"for":[14],"both":[15],"evaluation":[16],"and":[17,41,80,88,96,103,112,161,176],"generation.":[19],"Yet":[20],"most":[21],"existing":[22,151],"benchmarks":[23],"concentrate":[24],"on":[25,157],"simple":[26],"manipulation":[27,58,89,180],"tasks":[28,40,98],"such":[29,74],"as":[30,75],"pick-and-place,":[31],"failing":[32],"to":[33,122,173],"capture":[34],"the":[35,42],"non-Markovian":[36,95],"characteristics":[37],"real-world":[39],"complexity":[43],"articulated":[45,57],"object":[46],"interactions.":[47],"To":[48],"address":[49],"this":[50],"limitation,":[51],"we":[52],"present":[53],"RuleSafe,":[54],"new":[56],"benchmark":[59],"built":[60],"upon":[61],"LLM-aided":[64],"framework.":[66],"RuleSafe":[67],"features":[68],"safes":[69],"with":[70,150,181],"diverse":[71],"unlocking":[72],"mechanisms,":[73],"key":[76],"locks,":[77,79,82],"password":[78],"logic":[81],"which":[83],"require":[84,100],"different":[85],"multi-stage":[86],"reasoning":[87],"strategies.":[90],"These":[91],"LLM-generated":[92],"rules":[93],"produce":[94],"long-horizon":[97,169],"that":[99,116,147,165],"temporal":[101,114,145],"modeling":[102],"memory-based":[104],"reasoning.":[105],"We":[106],"further":[107],"propose":[108],"VQ-Memory,":[109],"compact":[111],"structured":[113],"representation":[115,132],"uses":[117],"vector-quantized":[118],"variational":[119],"autoencoders":[120],"(VQ-VAEs)":[121],"encode":[123],"past":[124],"proprioceptive":[125],"states":[126],"into":[127],"discrete":[128],"latent":[129],"tokens.":[130],"This":[131],"filters":[133],"low-level":[134],"noise":[135],"while":[136],"preserving":[137],"high-level":[138],"task-phase":[139],"context,":[140],"providing":[141],"lightweight":[142],"yet":[143],"robust":[144],"cues":[146],"are":[148],"compatible":[149],"Vision-Language-Action":[152],"models":[153,160],"(VLA).":[154],"Extensive":[155],"experiments":[156],"state-of-the-art":[158],"VLA":[159],"diffusion":[162],"policies":[163],"show":[164],"VQ-Memory":[166],"consistently":[167],"improves":[168],"planning,":[170],"enhances":[171],"generalization":[172],"unseen":[174],"configurations,":[175],"enables":[177],"more":[178],"efficient":[179],"reduced":[182],"computational":[183],"cost.":[184],"Project":[185],"page:":[186],"vqmemory.github.io":[187]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-12T00:00:00"}
