{"id":"https://openalex.org/W7130576655","doi":"https://doi.org/10.48550/arxiv.2602.16229","title":"Factored Latent Action World Models","display_name":"Factored Latent Action World Models","publication_year":2026,"publication_date":"2026-02-18","ids":{"openalex":"https://openalex.org/W7130576655","doi":"https://doi.org/10.48550/arxiv.2602.16229"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.16229","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16229","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.16229","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126367125","display_name":"Zizhao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Zizhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071717542","display_name":"Chang Zheng Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Chang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126366401","display_name":"Jiaheng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Jiaheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031730756","display_name":"Kevin Rohling","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rohling, Kevin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126446326","display_name":"Roberto Mart\u00edn-Mart\u00edn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mart\u00edn-Mart\u00edn, Roberto","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126380541","display_name":"Amy Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Amy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126385645","display_name":"Peter Stone","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stone, Peter","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5126367125"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.6606000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.6606000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.17720000445842743,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.04450000077486038,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6482999920845032},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5343000292778015},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.3677000105381012},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.36739999055862427},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3443000018596649},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.32330000400543213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6934999823570251},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6482999920845032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5882999897003174},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5343000292778015},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4921000003814697},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C65965080","wikidata":"https://www.wikidata.org/wiki/Q1806885","display_name":"Latent variable model","level":3,"score":0.304500013589859},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.27970001101493835},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.2556999921798706},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.16229","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16229","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.16229","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16229","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Learning":[0],"latent":[1,49,90,156],"actions":[2,20],"from":[3],"action-free":[4,116],"video":[5,112,117],"has":[6],"emerged":[7],"as":[8],"a":[9,22,47,75],"powerful":[10],"paradigm":[11],"for":[12,25],"scaling":[13],"up":[14],"controllable":[15],"world":[16],"model":[17],"learning.":[18],"Latent":[19,71],"provide":[21],"natural":[23],"interface":[24],"users":[26],"to":[27,51,120],"iteratively":[28],"generate":[29],"and":[30,41,56,92,110,129,143,146],"manipulate":[31],"videos.":[32],"However,":[33],"most":[34],"existing":[35],"approaches":[36],"rely":[37],"on":[38,124,126],"monolithic":[39,121],"inverse":[40],"forward":[42],"dynamics":[43,77,109],"models":[44],"that":[45,79,135],"learn":[46],"single":[48],"action":[50,91,157],"control":[52],"the":[53,81,152],"entire":[54],"scene,":[55],"therefore":[57],"struggle":[58],"in":[59,115,140],"complex":[60,107],"environments":[61],"where":[62],"multiple":[63],"entities":[64],"act":[65],"simultaneously.":[66],"This":[67,99],"paper":[68],"introduces":[69],"Factored":[70],"Action":[72],"Model":[73],"(FLAM),":[74],"factored":[76],"framework":[78],"decomposes":[80],"scene":[82],"into":[83],"independent":[84],"factors,":[85],"each":[86],"inferring":[87],"its":[88,94],"own":[89,95],"predicting":[93],"next-step":[96],"factor":[97],"value.":[98],"factorized":[100,155],"structure":[101],"enables":[102],"more":[103],"accurate":[104],"modeling":[105],"of":[106,154],"multi-entity":[108,131],"improves":[111],"generation":[113],"quality":[114],"settings":[118],"compared":[119],"models.":[122,158],"Based":[123],"experiments":[125],"both":[127],"simulation":[128],"real-world":[130],"datasets,":[132],"we":[133],"find":[134],"FLAM":[136],"outperforms":[137],"prior":[138],"work":[139],"prediction":[141],"accuracy":[142],"representation":[144],"quality,":[145],"facilitates":[147],"downstream":[148],"policy":[149],"learning,":[150],"demonstrating":[151],"benefits":[153]},"counts_by_year":[],"updated_date":"2026-02-20T06:18:38.638704","created_date":"2026-02-20T00:00:00"}
