{"id":"https://openalex.org/W7138201220","doi":"https://doi.org/10.1609/aaai.v40i8.37595","title":"ManipDreamer3D: Synthesizing Plausible Robotic Manipulation Video with Occupancy-aware 3D Trajectory","display_name":"ManipDreamer3D: Synthesizing Plausible Robotic Manipulation Video with Occupancy-aware 3D Trajectory","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138201220","doi":"https://doi.org/10.1609/aaai.v40i8.37595"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i8.37595","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37595","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37595/41557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37595/41557","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129729763","display_name":"Ying Li","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Ying Li","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University","State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University\nHong Kong University of Science and Technology\nAutonomous Driving Development, NIO"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University\nHong Kong University of Science and Technology\nAutonomous Driving Development, NIO","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129650073","display_name":"Xiaobao Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobao Wei","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nAutonomous Driving Development, NIO"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nAutonomous Driving Development, NIO","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129664654","display_name":"Xiaowei Chi","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiaowei Chi","raw_affiliation_strings":["Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129712594","display_name":"Yuming Li","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Yuming Li","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University","State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University\nHong Kong University of Science and Technology\nAutonomous Driving Development, NIO"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University\nHong Kong University of Science and Technology\nAutonomous Driving Development, NIO","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102507240","display_name":"Zhongyu Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongyu Zhao","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University\nSchool of Software and Microelectronics, Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129641638","display_name":"Hao Henry Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704984","display_name":"Ningning Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I31466659","display_name":"Chapingo Autonomous University","ror":"https://ror.org/04ctjby61","country_code":"MX","type":"education","lineage":["https://openalex.org/I31466659"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Ningning Ma","raw_affiliation_strings":["Autonomous Driving Development, NIO"],"affiliations":[{"raw_affiliation_string":"Autonomous Driving Development, NIO","institution_ids":["https://openalex.org/I31466659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129709628","display_name":"Ming Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Lu","raw_affiliation_strings":["State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129744190","display_name":"Sirui Han","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Sirui Han","raw_affiliation_strings":["Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5129729763"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.65909091,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"8","first_page":"6644","last_page":"6652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6987000107765198,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6987000107765198,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.061400000005960464,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.03460000082850456,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.656499981880188},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6365000009536743},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5497999787330627},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5332000255584717},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5098000168800354},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4968000054359436},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4465000033378601},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3707999885082245},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.3443000018596649}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7378000020980835},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7200999855995178},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6894999742507935},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.656499981880188},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6365000009536743},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5497999787330627},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5332000255584717},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5098000168800354},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4968000054359436},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4465000033378601},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3707999885082245},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3443000018596649},{"id":"https://openalex.org/C10912380","wikidata":"https://www.wikidata.org/wiki/Q527952","display_name":"Visual servoing","level":3,"score":0.3230000138282776},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.3181999921798706},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.2851000130176544},{"id":"https://openalex.org/C77660652","wikidata":"https://www.wikidata.org/wiki/Q150971","display_name":"Computer graphics","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C50637493","wikidata":"https://www.wikidata.org/wiki/Q1136781","display_name":"Morphing","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C2775960376","wikidata":"https://www.wikidata.org/wiki/Q1435859","display_name":"Grippers","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C23903533","wikidata":"https://www.wikidata.org/wiki/Q17122739","display_name":"Reprojection error","level":3,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i8.37595","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37595","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37595/41557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i8.37595","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37595","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37595/41557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138201220.pdf","grobid_xml":"https://content.openalex.org/works/W7138201220.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"scarcity":[1],"continues":[2],"to":[3,17,30,148,173],"be":[4],"a":[5,27,71,97,104,109,144],"critical":[6],"bottleneck":[7],"in":[8],"the":[9,15,40,83,87,118,123,153,160],"field":[10],"of":[11,42],"robotic":[12,33,79,175],"manipulation,":[13],"limiting":[14],"ability":[16],"train":[18],"robust":[19],"and":[20,44,86,126,139,159,198],"generalizable":[21],"models.":[22],"While":[23],"diffusion":[24,112,171],"models":[25],"provide":[26],"promising":[28],"approach":[29],"synthesizing":[31],"realistic":[32],"manipulation":[34,80],"videos,":[35],"their":[36],"effectiveness":[37],"hinges":[38],"on":[39,52],"availability":[41],"precise":[43],"reasonable":[45],"control":[46],"instructions.":[47],"Current":[48],"methods":[49],"primarily":[50],"rely":[51],"2D":[53],"trajectories":[54],"as":[55],"instruction":[56,158],"prompts,":[57],"which":[58],"inherently":[59],"face":[60],"issues":[61],"with":[62,96,108],"3D":[63,93,99,119,131,162,189],"spatial":[64],"ambiguity.":[65],"In":[66],"this":[67],"work,":[68],"we":[69,142],"present":[70],"novel":[72,110],"framework":[73],"named":[74],"ManipDreamer3Dfor":[75],"generating":[76],"plausible":[77,188],"3D-aware":[78],"videos":[81],"from":[82,103,122,152],"input":[84,124],"image":[85,125,155],"text":[88,157],"instruction.":[89],"Our":[90,178],"method":[91,179],"combines":[92],"trajectory":[94],"planning":[95],"reconstructed":[98],"occupancy":[100,120],"map":[101],"created":[102],"third-person":[105],"perspective,":[106],"along":[107],"trajectory-to-video":[111,170],"model.":[113],"Specifically,":[114],"ManipDreamer3D":[115],"first":[116],"reconstructs":[117],"representation":[121],"then":[127],"computes":[128],"an":[129],"optimized":[130,161],"end-effector":[132],"trajectory,":[133],"minimizing":[134],"path":[135],"length,":[136],"avoiding":[137],"collisions":[138],"retiming.":[140],"Next,":[141],"employ":[143],"latent":[145],"editing":[146],"technique":[147],"create":[149],"video":[150],"sequences":[151],"initial":[154],"latent,":[156],"trajectory.":[163],"This":[164],"process":[165],"conditions":[166],"our":[167],"specially":[168],"trained":[169],"model":[172],"produce":[174],"pick-and-place":[176],"videos.":[177],"significantly":[180],"reduces":[181],"human":[182],"intervention":[183],"requirements":[184],"by":[185],"autonomously":[186],"planing":[187],"trajectories.":[190],"Experimental":[191],"results":[192],"demonstrate":[193],"its":[194],"superior":[195],"visual":[196],"quality":[197],"precision.":[199]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
