{"id":"https://openalex.org/W7130427001","doi":"https://doi.org/10.48550/arxiv.2602.15549","title":"VLM-DEWM: Dynamic External World Model for Verifiable and Resilient Vision-Language Planning in Manufacturing","display_name":"VLM-DEWM: Dynamic External World Model for Verifiable and Resilient Vision-Language Planning in Manufacturing","publication_year":2026,"publication_date":"2026-02-17","ids":{"openalex":"https://openalex.org/W7130427001","doi":"https://doi.org/10.48550/arxiv.2602.15549"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.15549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.15549","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046053505","display_name":"Guoqin Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tang, Guoqin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125969661","display_name":"Qingxuan Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Qingxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126307065","display_name":"Gang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Gang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126288993","display_name":"Tong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125993570","display_name":"Zeyuan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zeyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125983655","display_name":"Zihang Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Zihang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5082054193","display_name":"Ning Ji","orcid":"https://orcid.org/0000-0001-7702-1537"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Ning","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5046053505"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.16859999299049377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.16859999299049377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.11400000005960464,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.10679999738931656,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.640500009059906},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.632099986076355},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.47209998965263367},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4072999954223633},{"id":"https://openalex.org/keywords/stateless-protocol","display_name":"Stateless protocol","score":0.38929998874664307},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3587000072002411},{"id":"https://openalex.org/keywords/doors","display_name":"Doors","score":0.3422999978065491}],"concepts":[{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.640500009059906},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.632099986076355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5629000067710876},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.47209998965263367},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4072999954223633},{"id":"https://openalex.org/C103613024","wikidata":"https://www.wikidata.org/wiki/Q230924","display_name":"Stateless protocol","level":3,"score":0.38929998874664307},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.38119998574256897},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C125209513","wikidata":"https://www.wikidata.org/wiki/Q4037520","display_name":"Doors","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C2780695315","wikidata":"https://www.wikidata.org/wiki/Q3799040","display_name":"Unobservable","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.28450000286102295},{"id":"https://openalex.org/C89992363","wikidata":"https://www.wikidata.org/wiki/Q5961558","display_name":"Track (disk drive)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2782999873161316},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.2644999921321869},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25940001010894775},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.15549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.15549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4103724956512451,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"model":[1],"(VLM)":[2],"shows":[3],"promise":[4],"for":[5,170],"high-level":[6],"planning":[7],"in":[8,14,174],"smart":[9],"manufacturing,":[10],"yet":[11],"their":[12],"deployment":[13],"dynamic":[15,175],"workcells":[16],"faces":[17],"two":[18],"critical":[19],"challenges:":[20],"(1)":[21],"stateless":[22],"operation,":[23],"they":[24],"cannot":[25],"persistently":[26],"track":[27],"out-of-view":[28],"states,":[29],"causing":[30],"world-state":[31,59],"drift;":[32],"and":[33,86,103,122,152,167],"(2)":[34],"opaque":[35],"reasoning,":[36],"failures":[37,97],"are":[38],"difficult":[39],"to":[40,43,129,141,150],"diagnose,":[41],"leading":[42],"costly":[44],"blind":[45],"retries.":[46],"This":[47],"paper":[48],"presents":[49],"VLM-DEWM,":[50],"a":[51,62,165],"cognitive":[52],"architecture":[53],"that":[54],"decouples":[55],"VLM":[56,71,132,134],"reasoning":[57],"from":[58,139,147],"management":[60],"through":[61,157],"persistent,":[63],"queryable":[64],"Dynamic":[65],"External":[66],"World":[67],"Model":[68],"(DEWM).":[69],"Each":[70],"decision":[72],"is":[73,90],"structured":[74,158],"into":[75],"an":[76],"Externalizable":[77],"Reasoning":[78],"Trace":[79],"(ERT),":[80],"comprising":[81],"action":[82],"proposal,":[83],"world":[84],"belief,":[85],"causal":[87],"assumption,":[88],"which":[89],"validated":[91],"against":[92],"DEWM":[93,135],"before":[94],"execution.":[95],"When":[96],"occur,":[98],"discrepancy":[99],"analysis":[100],"between":[101],"predicted":[102],"observed":[104],"states":[105],"enables":[106],"targeted":[107],"recovery":[108,124,144],"instead":[109],"of":[110],"global":[111],"replanning.":[112],"We":[113],"evaluate":[114],"VLM-DEWM":[115,163],"on":[116],"multi-station":[117],"assembly,":[118],"large-scale":[119],"facility":[120],"exploration,":[121],"real-robot":[123],"under":[125],"induced":[126],"failures.":[127],"Compared":[128],"baseline":[130],"memory-augmented":[131],"systems,":[133],"improves":[136],"state-tracking":[137],"accuracy":[138],"56%":[140],"93%,":[142],"increases":[143],"success":[145],"rate":[146],"below":[148],"5%":[149],"95%,":[151],"significantly":[153],"reduces":[154],"computational":[155],"overhead":[156],"memory.":[159],"These":[160],"results":[161],"establish":[162],"as":[164],"verifiable":[166],"resilient":[168],"solution":[169],"long-horizon":[171],"robotic":[172],"operations":[173],"manufacturing":[176],"environments.":[177]},"counts_by_year":[],"updated_date":"2026-02-19T06:31:58.851227","created_date":"2026-02-19T00:00:00"}
