{"id":"https://openalex.org/W7148489387","doi":"https://doi.org/10.48550/arxiv.2604.01001","title":"EgoSim: Egocentric World Simulator for Embodied Interaction Generation","display_name":"EgoSim: Egocentric World Simulator for Embodied Interaction Generation","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7148489387","doi":"https://doi.org/10.48550/arxiv.2604.01001"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.01001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.01001","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132807121","display_name":"Jinkun Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hao, Jinkun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099478071","display_name":"Mingda Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Mingda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132794415","display_name":"Ruiyan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ruiyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132799540","display_name":"Xihui Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xihui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132809726","display_name":"Ran Yi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi, Ran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093580225","display_name":"Lizhuang MA","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Lizhuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132799060","display_name":"Jiangmiao Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Jiangmiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132812593","display_name":"Xudong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xudong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5132807121"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.767799973487854,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.767799973487854,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.0940999984741211,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.03220000118017197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5976999998092651},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5077999830245972},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.49720001220703125},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4796999990940094},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4609000086784363},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4075999855995178},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.40299999713897705}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7775999903678894},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5976999998092651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5652999877929688},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5077999830245972},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48100000619888306},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4609000086784363},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4438999891281128},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.40299999713897705},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.3840000033378601},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3831000030040741},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.376800000667572},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.01001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.01001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,65,122],"introduce":[1,124],"EgoSim,":[2],"a":[3,70,103,126],"closed-loop":[4],"egocentric":[5,26,120],"world":[6,48,63],"simulator":[7],"that":[8,106,129,141],"generates":[9],"spatially":[10],"consistent":[11],"interaction":[12],"videos":[13],"and":[14,113,154,159,171],"persistently":[15],"updates":[16],"the":[17,41,86,92],"underlying":[18],"3D":[19,31,59],"scene":[20,42],"state":[21],"for":[22],"continuous":[23],"simulation.":[24],"Existing":[25],"simulators":[27],"either":[28],"lack":[29],"explicit":[30],"grounding,":[32],"causing":[33],"structural":[34],"drift":[35],"under":[36],"viewpoint":[37],"changes,":[38],"or":[39],"treat":[40],"as":[43,61],"static,":[44],"failing":[45],"to":[46,156,167],"update":[47],"states":[49],"across":[50],"multi-stage":[51],"interactions.":[52],"EgoSim":[53,142],"addresses":[54],"both":[55],"limitations":[56],"by":[57,91],"modeling":[58],"scenes":[60,158],"updatable":[62],"states.":[64],"generate":[66],"embodiment":[67,114],"interactions":[68],"via":[69],"Geometry-action-aware":[71],"Observation":[72],"Simulation":[73],"model,":[74],"with":[75,135],"spatial":[76,152],"consistency":[77],"from":[78,116],"an":[79],"Interaction-aware":[80],"State":[81],"Updating":[82],"module.":[83],"To":[84],"overcome":[85],"critical":[87],"data":[88,133],"bottleneck":[89],"posed":[90],"difficulty":[93],"in":[94,147],"acquiring":[95],"densely":[96],"aligned":[97],"scene-interaction":[98],"training":[99],"pairs,":[100],"we":[101],"design":[102],"scalable":[104],"pipeline":[105],"extracts":[107],"static":[108],"point":[109],"clouds,":[110],"camera":[111],"trajectories,":[112],"actions":[115],"in-the-wild":[117,160],"large-scale":[118],"monocular":[119],"videos.":[121],"further":[123],"EgoCap,":[125],"capture":[127],"system":[128],"enables":[130],"low-cost":[131],"real-world":[132],"collection":[134],"uncalibrated":[136],"smartphones.":[137],"Extensive":[138],"experiments":[139],"demonstrate":[140],"significantly":[143],"outperforms":[144],"existing":[145],"methods":[146],"terms":[148],"of":[149],"visual":[150],"quality,":[151],"consistency,":[153],"generalization":[155],"complex":[157],"dexterous":[161],"interactions,":[162],"while":[163],"supporting":[164],"cross-embodiment":[165],"transfer":[166],"robotic":[168],"manipulation.":[169],"Codes":[170],"datasets":[172],"will":[173],"be":[174],"open":[175],"soon.":[176],"The":[177],"project":[178],"page":[179],"is":[180],"at":[181],"egosimulator.github.io.":[182]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
