{"id":"https://openalex.org/W7161249739","doi":"https://doi.org/10.48550/arxiv.2605.15153","title":"Pelican-Unify 1.0: A Unified Embodied Intelligence Model for Understanding, Reasoning, Imagination and Action","display_name":"Pelican-Unify 1.0: A Unified Embodied Intelligence Model for Understanding, Reasoning, Imagination and Action","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161249739","doi":"https://doi.org/10.48550/arxiv.2605.15153"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.15153","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136272230","display_name":"Yi Zhang","orcid":"https://orcid.org/0009-0004-1185-0198"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136265219","display_name":"Yinda Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yinda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136221117","display_name":"Che Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Che","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057983414","display_name":"Zeyuan Ding","orcid":"https://orcid.org/0000-0002-4969-6200"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Zeyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136205423","display_name":"Jin; id_orcid 0009-0004-1045-6690 Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106452106","display_name":"Shilong Zou","orcid":"https://orcid.org/0009-0004-1124-3830"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Shilong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136225587","display_name":"Junwei Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Junwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136260495","display_name":"Jiayu Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Jiayu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136193326","display_name":"Xiancong Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Xiancong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136263671","display_name":"Xiaopeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaopeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136222205","display_name":"Yechi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yechi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136252284","display_name":"Haoyuan Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Haoyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121873825","display_name":"Zecong Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Zecong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136227266","display_name":"Haosong Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Haosong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136269019","display_name":"Renwen Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Renwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136217146","display_name":"Kuishu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Kuishu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013297655","display_name":"Wenhai Liu","orcid":"https://orcid.org/0000-0003-0166-7774"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wenhai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136224513","display_name":"Yang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002797727","display_name":"Yingji Zhang","orcid":"https://orcid.org/0000-0003-1499-3309"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yingji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136199230","display_name":"Yidong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yidong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136251074","display_name":"Senkang Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Senkang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136206231","display_name":"Jinpeng Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Jinpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047180180","display_name":"Nga Teng Chan","orcid":"https://orcid.org/0009-0002-3092-6974"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chan, Nga Teng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061782180","display_name":"Yechen Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yechen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136260697","display_name":"Yong Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zeting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136233083","display_name":"Jian Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Xianzhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136206327","display_name":"Xiaozhu Ju","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Yong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tang, Jian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Ju, Xiaozhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ju, Xiaozhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":29,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.505299985408783,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.505299985408783,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1054999977350235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.09269999712705612,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unification","display_name":"Unification","score":0.73089998960495},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7020999789237976},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.6567000150680542},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.48980000615119934},{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.41920000314712524},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4018999934196472},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.3874000012874603},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.349700003862381}],"concepts":[{"id":"https://openalex.org/C96146094","wikidata":"https://www.wikidata.org/wiki/Q609057","display_name":"Unification","level":2,"score":0.73089998960495},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7020999789237976},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.6567000150680542},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6564000248908997},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5931000113487244},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.48980000615119934},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.4198000133037567},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.41920000314712524},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4018999934196472},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.3490999937057495},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C137002209","wikidata":"https://www.wikidata.org/wiki/Q898521","display_name":"Hidden variable theory","level":3,"score":0.30809998512268066},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3018999993801117},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.28859999775886536},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.27219998836517334},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.26840001344680786},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26159998774528503},{"id":"https://openalex.org/C193856179","wikidata":"https://www.wikidata.org/wiki/Q5251100","display_name":"Defeasible estate","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25609999895095825},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2549000084400177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"Pelican-Unify":[2,16,149],"1.0,":[3],"the":[4,12,66,100,114,118,163,177,188],"first":[5],"embodied":[6],"foundation":[7],"model":[8,119],"trained":[9],"according":[10],"to":[11,120],"principle":[13],"of":[14,57],"unification.":[15],"1.0":[17,150],"uses":[18],"a":[19,23,36,46,60,71,146],"single":[20,61,147],"VLM":[21,42,161],"as":[22,45],"unified":[24,47,189],"understanding":[25],"module,":[26,49],"mapping":[27],"scenes,":[28],"instructions,":[29],"visual":[30],"contexts,":[31],"and":[32,54,64,86,91,107,126,173,201],"action":[33,108,127,182,202],"histories":[34],"into":[35,70,113,203],"shared":[37,115],"semantic":[38],"space.":[39],"The":[40,104],"same":[41,101],"also":[43],"serves":[44],"reasoning":[48],"autoregressively":[50],"producing":[51],"task-,":[52],"action-,":[53],"future-oriented":[55],"chains":[56],"thought":[58],"in":[59,192],"forward":[62],"pass":[63],"projecting":[65],"final":[67],"hidden":[68],"state":[69],"dense":[72],"latent":[73,84],"variable.":[74],"A":[75],"Unified":[76],"Future":[77],"Generator":[78],"(UFG)":[79],"then":[80],"conditions":[81],"on":[82,159,169,175],"this":[83],"variable":[85],"jointly":[87,121],"generates":[88],"future":[89,92],"videos":[90],"actions":[93],"through":[94],"two":[95],"modality-specific":[96],"output":[97],"heads":[98],"within":[99],"denoising":[102],"process.":[103],"language,":[105],"video,":[106],"losses":[109],"are":[110],"all":[111,155],"backpropagated":[112],"representation,":[116],"enabling":[117],"optimize":[122],"understanding,":[123,198],"reasoning,":[124,199],"imagination,":[125,200],"during":[128],"training,":[129],"rather":[130],"than":[131],"training":[132],"three":[133,156],"isolated":[134],"expert":[135],"systems.":[136],"Experiments":[137],"demonstrate":[138],"that":[139,187],"unification":[140],"does":[141],"not":[142],"imply":[143],"compromise.":[144],"With":[145],"checkpoint,":[148],"achieves":[151],"strong":[152],"performance":[153],"across":[154],"capabilities:":[157],"64.7":[158],"eight":[160],"benchmarks,":[162],"best":[164],"among":[165,180],"comparable-scale":[166],"models;":[167],"66.03":[168],"WorldArena,":[170],"ranking":[171],"first;":[172],"93.5":[174],"RoboTwin,":[176],"second-best":[178],"average":[179],"compared":[181],"methods.":[183],"These":[184],"results":[185],"show":[186],"paradigm":[190],"succeeds":[191],"preserving":[193],"specialist":[194],"strength":[195],"while":[196],"bringing":[197],"one":[204],"model.":[205]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
