{"id":"https://openalex.org/W7123368220","doi":"https://doi.org/10.1109/lra.2026.3653273","title":"FlowDreamer: A RGB-D World Model With Flow-Based Motion Representations for Robot Manipulation","display_name":"FlowDreamer: A RGB-D World Model With Flow-Based Motion Representations for Robot Manipulation","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7123368220","doi":"https://doi.org/10.1109/lra.2026.3653273"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3653273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3653273","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122851442","display_name":"Jun Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jun Guo","raw_affiliation_strings":["State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122862310","display_name":"Xiaojian Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojian Ma","raw_affiliation_strings":["State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747441","display_name":"Yikai Wang","orcid":"https://orcid.org/0000-0003-1341-6235"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yikai Wang","raw_affiliation_strings":["School of Artificial Intelligence, Beijing Normal University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing Normal University, Beijing, China","institution_ids":["https://openalex.org/I25254941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122900946","display_name":"Min Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Yang","raw_affiliation_strings":["State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122875437","display_name":"Huaping Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaping Liu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122904001","display_name":"Qing Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Li","raw_affiliation_strings":["State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5122851442"],"corresponding_institution_ids":["https://openalex.org/I4210100255"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19093457,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"3","first_page":"2466","last_page":"2473"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.23970000445842743,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.23970000445842743,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.21610000729560852,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.16089999675750732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6873000264167786},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.6528000235557556},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.47369998693466187},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.46790000796318054},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4408999979496002},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.3950999975204468},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.3707999885082245}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7218999862670898},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7138000130653381},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6873000264167786},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6793000102043152},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.6528000235557556},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.47369998693466187},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4408999979496002},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.3707999885082245},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.30379998683929443},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3653273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3653273","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1578285471","https://openalex.org/W1901129140","https://openalex.org/W1980035368","https://openalex.org/W2098500213","https://openalex.org/W2114770744","https://openalex.org/W2133665775","https://openalex.org/W2528489519","https://openalex.org/W2799055999","https://openalex.org/W2962785568","https://openalex.org/W2963125977","https://openalex.org/W2963149945","https://openalex.org/W2989847975","https://openalex.org/W3186096360","https://openalex.org/W4214520160","https://openalex.org/W4312814625","https://openalex.org/W4312828807","https://openalex.org/W4312933868","https://openalex.org/W4313160783","https://openalex.org/W4385430679","https://openalex.org/W4386076323","https://openalex.org/W4391759936","https://openalex.org/W4400582084","https://openalex.org/W4409147637","https://openalex.org/W4413145386","https://openalex.org/W4413155399","https://openalex.org/W4415798746","https://openalex.org/W7103752023"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"investigates":[2],"training":[3],"better":[4,132],"visual":[5,17,55,123],"world":[6,30,38,139],"models":[7,12,31,140],"for":[8],"robot":[9,25,157],"manipulation,":[10],"i.e.,":[11],"that":[13,32,45,129],"can":[14],"predict":[15,95],"future":[16,97],"observations":[18],"by":[19,141],"conditioning":[20],"on":[21,34,114,143,147,152],"past":[22,81],"frames":[23,36],"and":[24,51,83,89,122,150],"actions.":[26],"Specifically,":[27],"we":[28,61],"consider":[29],"operate":[33],"RGB-D":[35,138],"(RGB-D":[37],"models).":[39],"As":[40],"opposed":[41],"to":[42,135],"canonical":[43],"approaches":[44],"handle":[46],"dynamics":[47],"prediction":[48,121],"mostly":[49],"implicitly":[50],"reconcile":[52],"it":[53],"with":[54,86],"rendering":[56],"in":[57,155],"a":[58,87,91],"single":[59],"model,":[60],"introduce":[62],"FlowDreamer,":[63],"which":[64],"adopts":[65],"3D":[66,76],"scene":[67,77,101],"flow":[68,78],"as":[69],"explicit":[70],"motion":[71],"representations.":[72],"FlowDreamer":[73,103,130],"first":[74],"predicts":[75],"from":[79],"the":[80,96,100],"frame":[82,98],"action":[84],"conditions":[85],"U-Net,":[88],"then":[90],"diffusion":[92],"model":[93],"will":[94],"utilizing":[99],"flow.":[102],"is":[104],"trained":[105],"end-to-end":[106],"despite":[107],"its":[108],"modularized":[109],"nature.":[110],"We":[111],"conduct":[112],"experiments":[113],"4":[115],"different":[116],"benchmarks,":[117],"covering":[118],"both":[119],"video":[120],"planning":[124],"tasks.":[125],"The":[126],"results":[127],"demonstrate":[128],"achieves":[131],"performance":[133],"compared":[134],"other":[136],"baseline":[137],"7%":[142],"semantic":[144],"similarity,":[145],"11%":[146],"pixel":[148],"quality,":[149],"6%":[151],"success":[153],"rate":[154],"various":[156],"manipulation":[158],"domains.":[159]},"counts_by_year":[],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2026-01-14T00:00:00"}
