{"id":"https://openalex.org/W7139013557","doi":"https://doi.org/10.48550/arxiv.2603.17240","title":"GigaWorld-Policy: An Efficient Action-Centered World--Action Model","display_name":"GigaWorld-Policy: An Efficient Action-Centered World--Action Model","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7139013557","doi":"https://doi.org/10.48550/arxiv.2603.17240"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17240","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17240","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17240","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130101174","display_name":"Angen Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ye, Angen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130088672","display_name":"Boyuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Boyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125901580","display_name":"Chaojun Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ni, Chaojun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129811001","display_name":"Guan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Guan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130129245","display_name":"Guosheng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Guosheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130133485","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111111965","display_name":"Hengtao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hengtao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130163456","display_name":"Jie Li (15030)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040551371","display_name":"Jindi Lv","orcid":"https://orcid.org/0000-0001-9071-5728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Jindi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130060046","display_name":"Jingyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jingyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129943856","display_name":"Min Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129973681","display_name":"Peng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129773876","display_name":"Qiuping Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Qiuping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074966640","display_name":"Wenjun Mei","orcid":"https://orcid.org/0000-0001-9575-1496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mei, Wenjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130000044","display_name":"Xiaofeng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaofeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129780346","display_name":"Xinze Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xinze","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129965893","display_name":"Xinyu Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130101808","display_name":"Yang Wang (5921)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010151006","display_name":"Yifan Chang","orcid":"https://orcid.org/0000-0003-2820-0840"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129893615","display_name":"Yifan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129957710","display_name":"Yukun Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yukun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129799503","display_name":"Yun Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Yun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129955954","display_name":"Zhichao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zhichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130201424","display_name":"Zheng Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Zheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":24,"corresponding_author_ids":["https://openalex.org/A5130101174"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2572999894618988,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2572999894618988,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.18629999458789825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12219999730587006,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7305999994277954},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.685699999332428},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6496999859809875},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6197999715805054},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5504999756813049},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.47209998965263367},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4325000047683716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7972999811172485},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7305999994277954},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.685699999332428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6567999720573425},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6496999859809875},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6197999715805054},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5504999756813049},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5033000111579895},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.47209998965263367},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4325000047683716},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3479999899864197},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3310999870300293},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.31540000438690186},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.305400013923645},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17240","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17240","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17240","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17240","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"World-Action":[0],"Models":[1],"(WAM)":[2],"initialized":[3],"from":[4,154],"pre-trained":[5],"video":[6,63,87,132,187],"generation":[7,160,188],"backbones":[8],"have":[9],"demonstrated":[10],"remarkable":[11],"potential":[12],"for":[13,197],"robot":[14,181,198],"policy":[15,92,124,199],"learning.":[16,200],"However,":[17],"existing":[18],"approaches":[19],"face":[20],"two":[21,95],"critical":[22],"bottlenecks":[23],"that":[24,75,150,208],"hinder":[25],"performance":[26,232],"and":[27,36,49,109,119,131,138],"deployment.":[28,171],"First,":[29],"jointly":[30],"reasoning":[31],"over":[32],"future":[33,62,101,112],"visual":[34,48],"dynamics":[35,79],"corresponding":[37],"actions":[38,118,142],"incurs":[39],"substantial":[40],"inference":[41,164],"overhead.":[42],"Second,":[43],"joint":[44],"modeling":[45],"often":[46],"entangles":[47],"motion":[50,53],"representations,":[51],"making":[52],"prediction":[54,130,169],"accuracy":[55],"heavily":[56],"dependent":[57],"on":[58,105,115,203,235],"the":[59,98,106,116,120,195,214],"quality":[60],"of":[61],"forecasts.":[64],"To":[65,172],"address":[66],"these":[67],"issues,":[68],"we":[69,90,176],"introduce":[70],"GigaWorld-Policy,":[71],"an":[72,185],"action-centered":[73,186],"WAM":[74,216],"learns":[76],"2D":[77],"pixel-action":[78],"while":[80,219],"enabling":[81],"efficient":[82],"action":[83,102,129,156,168],"decoding,":[84],"with":[85,228],"optional":[86,162],"generation.":[88],"Specifically,":[89],"formulate":[91],"training":[93],"into":[94],"coupled":[96],"components:":[97],"model":[99],"predicts":[100],"sequences":[103],"conditioned":[104,114],"current":[107],"observation,":[108],"simultaneously":[110],"generates":[111],"videos":[113],"predicted":[117],"same":[121],"observation.":[122],"The":[123],"is":[125,161,191],"supervised":[126],"by":[127,224,233],"both":[128],"generation,":[133],"providing":[134],"richer":[135],"learning":[136],"signals":[137],"encouraging":[139],"physically":[140],"plausible":[141],"through":[143],"visual-dynamics":[144],"constraints.":[145],"With":[146],"a":[147,178],"causal":[148],"design":[149],"prevents":[151],"future-video":[152,159],"tokens":[153],"influencing":[155],"tokens,":[157],"explicit":[158],"at":[163],"time,":[165],"allowing":[166],"faster":[167,212],"during":[170],"support":[173],"this":[174],"paradigm,":[175],"curate":[177],"diverse,":[179],"large-scale":[180],"dataset":[182],"to":[183],"pre-train":[184],"model,":[189],"which":[190],"then":[192],"adapted":[193],"as":[194],"backbone":[196],"Experimental":[201],"results":[202],"real-world":[204],"robotic":[205],"platforms":[206],"show":[207],"GigaWorld-Policy":[209,230],"runs":[210],"9x":[211],"than":[213],"leading":[215],"baseline,":[217],"Motus,":[218],"improving":[220],"task":[221],"success":[222],"rates":[223],"7%.":[225],"Moreover,":[226],"compared":[227],"pi-0.5,":[229],"improves":[231],"95%":[234],"RoboTwin":[236],"2.0.":[237]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
