{"id":"https://openalex.org/W7155884586","doi":"https://doi.org/10.48550/arxiv.2604.22238","title":"CodeGraphVLP: Code-as-Planner Meets Semantic-Graph State for Non-Markovian Vision-Language-Action Models","display_name":"CodeGraphVLP: Code-as-Planner Meets Semantic-Graph State for Non-Markovian Vision-Language-Action Models","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7155884586","doi":"https://doi.org/10.48550/arxiv.2604.22238"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22238","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22238","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22238","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134659678","display_name":"Khoa Vo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Vo, Khoa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116696777","display_name":"Sieu Tran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tran, Sieu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108918698","display_name":"Taisei Hanyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hanyu, Taisei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121640803","display_name":"Yuki Ikebe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ikebe, Yuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134701685","display_name":"Duy Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Duy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134708572","display_name":"Bui Duy Quoc Nghi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nghi, Bui Duy Quoc","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052911807","display_name":"Minh Vu","orcid":"https://orcid.org/0009-0009-1441-8276"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Minh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077092430","display_name":"Anthony L. Gunderman","orcid":"https://orcid.org/0000-0002-8554-2970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gunderman, Anthony","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030170704","display_name":"Chase Rainwater","orcid":"https://orcid.org/0009-0008-6899-0932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rainwater, Chase","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134703017","display_name":"Anh Tu\u1ea5n Nguy\u1ec5n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Anh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134731671","display_name":"Ngan Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Ngan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5134659678"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8565000295639038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8565000295639038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.029600000008940697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.026399999856948853,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.6759999990463257},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.647599995136261},{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.6437000036239624},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5576000213623047},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4616999924182892},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.45320001244544983},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4413999915122986},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.42410001158714294},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4212999939918518}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7217000126838684},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.6759999990463257},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.647599995136261},{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.6437000036239624},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5576000213623047},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5194000005722046},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4616999924182892},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.45320001244544983},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4413999915122986},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4212999939918518},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3522000014781952},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3197000026702881},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30790001153945923},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.29910001158714294},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2797999978065491},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C95203288","wikidata":"https://www.wikidata.org/wiki/Q221682","display_name":"Semaphore","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C11821877","wikidata":"https://www.wikidata.org/wiki/Q187959","display_name":"Distributive property","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C132094186","wikidata":"https://www.wikidata.org/wiki/Q641585","display_name":"Clutter","level":3,"score":0.26429998874664307}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22238","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22238","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22238","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22238","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2],"promise":[3],"generalist":[4],"robot":[5],"manipulation,":[6],"but":[7],"are":[8],"typically":[9],"trained":[10],"and":[11,46,49,78,87,104,141],"deployed":[12],"as":[13],"short-horizon":[14],"policies":[15],"that":[16,62,121],"assume":[17],"the":[18,44,123,161],"latest":[19],"observation":[20],"is":[21],"sufficient":[22],"for":[23],"action":[24],"reasoning.":[25],"This":[26],"assumption":[27],"breaks":[28],"in":[29,43],"non-Markovian":[30,131],"long-horizon":[31,65],"tasks,":[32,132],"where":[33,47],"task-relevant":[34,85],"evidence":[35],"can":[36],"be":[37],"occluded":[38],"or":[39],"appear":[40],"only":[41],"earlier":[42],"trajectory,":[45],"clutter":[48],"distractors":[50],"make":[51],"fine-grained":[52],"visual":[53],"grounding":[54],"brittle.":[55],"We":[56,113,153],"present":[57],"CodeGraphVLP,":[58],"a":[59,69,106],"hierarchical":[60],"framework":[61],"enables":[63],"reliable":[64],"manipulation":[66],"by":[67],"combining":[68],"persistent":[70],"semantic-graph":[71,83,98],"state":[72],"with":[73,110],"an":[74],"executable":[75],"code-based":[76],"planner":[77,94],"progress-guided":[79],"visual-language":[80],"prompting.":[81],"The":[82,92],"maintains":[84],"entities":[86],"relations":[88],"under":[89],"partial":[90],"observability.":[91],"synthesized":[93],"executes":[95],"over":[96,137],"this":[97],"to":[99,117,150,159],"perform":[100],"efficient":[101],"progress":[102],"checks":[103],"outputs":[105,116],"subtask":[107],"instruction":[108],"together":[109],"subtask-relevant":[111],"objects.":[112],"use":[114],"these":[115],"construct":[118],"clutter-suppressed":[119],"observations":[120],"focus":[122],"VLA":[124,139],"executor":[125],"on":[126],"critical":[127],"evidence.":[128],"On":[129],"real-world":[130],"CodeGraphVLP":[133],"improves":[134],"task":[135],"completion":[136],"strong":[138],"baselines":[140],"history-enabled":[142],"variants":[143],"while":[144],"substantially":[145],"lowering":[146],"planning":[147],"latency":[148],"compared":[149],"VLM-in-the-loop":[151],"planning.":[152],"also":[154],"conduct":[155],"extensive":[156],"ablation":[157],"studies":[158],"confirm":[160],"contributions":[162],"of":[163],"each":[164],"component.":[165]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-28T00:00:00"}
