{"id":"https://openalex.org/W4415537396","doi":"https://doi.org/10.1145/3746027.3754880","title":"Building Embodied EvoAgent: A Brain-inspired Paradigm for Bridging Multimodal Large Models and World Models","display_name":"Building Embodied EvoAgent: A Brain-inspired Paradigm for Bridging Multimodal Large Models and World Models","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415537396","doi":"https://doi.org/10.1145/3746027.3754880"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754880","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754880","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014526931","display_name":"Junyu Gao","orcid":"https://orcid.org/0000-0002-8105-5497"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junyu Gao","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8105-5497","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101501874","display_name":"Xuan Yao","orcid":"https://orcid.org/0009-0000-8115-3954"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Yao","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-8115-3954","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100728762","display_name":"Yong Rui","orcid":"https://orcid.org/0000-0002-9142-5914"},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Rui","raw_affiliation_strings":["Lenovo Research, Lenovo Group Ltd., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9142-5914","affiliations":[{"raw_affiliation_string":"Lenovo Research, Lenovo Group Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210156165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022636178","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China, School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China, and Peng Cheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8343-9665","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China, School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China, and Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210100255"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014526931"],"corresponding_institution_ids":["https://openalex.org/I4210100255"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28654362,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3280","last_page":"3289"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9527000188827515,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.8894000053405762},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6104000210762024},{"id":"https://openalex.org/keywords/cognitive-robotics","display_name":"Cognitive robotics","score":0.5760999917984009},{"id":"https://openalex.org/keywords/embodied-agent","display_name":"Embodied agent","score":0.5687000155448914},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5501000285148621},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4496999979019165},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4472000002861023},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.37869998812675476},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.36550000309944153}],"concepts":[{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.8894000053405762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6855000257492065},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6104000210762024},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5990999937057495},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.5760999917984009},{"id":"https://openalex.org/C103683099","wikidata":"https://www.wikidata.org/wiki/Q5370102","display_name":"Embodied agent","level":3,"score":0.5687000155448914},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5501000285148621},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4966999888420105},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4496999979019165},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.37869998812675476},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.357699990272522},{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C43540301","wikidata":"https://www.wikidata.org/wiki/Q689971","display_name":"Paradigm shift","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C2777371692","wikidata":"https://www.wikidata.org/wiki/Q2178611","display_name":"Spatial cognition","level":3,"score":0.26019999384880066},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754880","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754880","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W4226052928","https://openalex.org/W4226376247","https://openalex.org/W4386076428","https://openalex.org/W4386076672","https://openalex.org/W4390872465","https://openalex.org/W4402703032","https://openalex.org/W4402778069"],"related_works":[],"abstract_inverted_index":{"Embodied":[0],"artificial":[1],"intelligence":[2,59],"has":[3],"rapidly":[4],"developed":[5],"under":[6],"the":[7,84,88,94,118,126,137,149,156,164,176,180,195,201,217,222],"impetus":[8],"of":[9,87,93,125,163,179,224,230],"multimodal":[10,66],"learning,":[11],"robotics,":[12],"and":[13,24,36,47,76,80,90,103,121,133,159,170,194,232,244],"cognitive":[14],"science,":[15],"demonstrating":[16],"great":[17],"potential":[18],"in":[19,34,72,227,237],"fields":[20],"such":[21,42],"as":[22,43],"navigation":[23],"manipulation.":[25],"However,":[26],"building":[27],"embodied":[28,58,107,113,225,241],"agents":[29,226],"that":[30,216],"can":[31],"robustly":[32],"operate":[33],"diverse":[35],"dynamic":[37,78,185,207],"environments":[38,75,208],"still":[39],"faces":[40],"challenges,":[41],"handling":[44],"partial":[45],"observability":[46],"environmental":[48,168],"adaptability.":[49],"Multimodal":[50],"large":[51],"language":[52,119],"models":[53],"(MLLMs)":[54],"are":[55],"vital":[56],"for":[57,106,130,188],"due":[60],"to":[61,64,116,154,203,206],"their":[62,234],"ability":[63,236],"process":[65],"information,":[67],"but":[68],"they":[69],"encounter":[70],"difficulties":[71],"understanding":[73,131],"spatial":[74,157],"performing":[77],"decisions":[79],"evolution.":[81,246],"Inspired":[82],"by":[83],"functional":[85],"specialization":[86],"left":[89,127],"right":[91,165],"hemispheres":[92],"human":[95],"brain,":[96],"this":[97],"paper":[98],"proposes":[99],"a":[100,142,228],"brain-inspired":[101],"learning":[102],"evolution":[104],"paradigm":[105,219],"agents.":[108],"The":[109],"method":[110],"designs":[111],"an":[112],"context-augmented":[114],"MLLM":[115],"simulate":[117,155],"processing":[120],"logical":[122],"analysis":[123],"capabilities":[124],"hemisphere,":[128,166],"responsible":[129],"instructions":[132],"visual":[134],"scenes.":[135],"At":[136],"same":[138],"time,":[139],"it":[140],"constructs":[141],"perceptual":[143],"context-guided":[144],"world":[145,196],"model":[146,153],"based":[147],"on":[148],"recurrent":[150],"state":[151],"space":[152],"perception":[158],"holistic":[160],"thinking":[161],"functions":[162],"capturing":[167],"dynamics":[169],"predicting":[171],"future":[172],"states.":[173],"By":[174],"simulating":[175],"communication":[177,186],"function":[178],"corpus":[181],"callosum,":[182],"we":[183],"propose":[184],"slots":[187],"efficient":[189],"information":[190],"exchange":[191],"between":[192],"MLLMs":[193],"model,":[197],"which":[198],"also":[199],"allows":[200],"agent":[202],"quickly":[204],"adapt":[205],"without":[209],"requiring":[210],"extensive":[211],"computational":[212],"resources.":[213],"Experiments":[214],"show":[215],"proposed":[218],"significantly":[220],"improves":[221],"performance":[223],"series":[229],"tasks":[231,239],"enhances":[233],"generalization":[235],"zero-shot":[238],"through":[240],"exploration":[242],"experience":[243],"online":[245],"Our":[247],"project":[248],"page":[249],"is":[250],"available":[251],"at":[252],"https://feliciaxyao.github.io/EvoAgent/.":[253]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-25T00:00:00"}
