{"id":"https://openalex.org/W7131471832","doi":"https://doi.org/10.48550/arxiv.2602.20057","title":"AdaWorldPolicy: World-Model-Driven Diffusion Policy with Online Adaptive Learning for Robotic Manipulation","display_name":"AdaWorldPolicy: World-Model-Driven Diffusion Policy with Online Adaptive Learning for Robotic Manipulation","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131471832","doi":"https://doi.org/10.48550/arxiv.2602.20057"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.20057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.20057","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126730790","display_name":"Ge Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuan, Ge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120065511","display_name":"Qiyuan Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Qiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126848854","display_name":"Jing Jing Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126842426","display_name":"Dong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Dong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5126730790"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8449000120162964,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8449000120162964,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.014999999664723873,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modularity","display_name":"Modularity (biology)","score":0.6304000020027161},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.4546000063419342},{"id":"https://openalex.org/keywords/adaptive-learning","display_name":"Adaptive learning","score":0.4413999915122986},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4221000075340271},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4156000018119812},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4081999957561493},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4059000015258789},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.396699994802475},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.39649999141693115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.650600016117096},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.6304000020027161},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.535099983215332},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.4413999915122986},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4221000075340271},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4156000018119812},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4081999957561493},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4059000015258789},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.39649999141693115},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3752000033855438},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3409999907016754},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33379998803138733},{"id":"https://openalex.org/C2780626000","wikidata":"https://www.wikidata.org/wiki/Q5936775","display_name":"Human-in-the-loop","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.31540000438690186},{"id":"https://openalex.org/C52970973","wikidata":"https://www.wikidata.org/wiki/Q2497134","display_name":"Adaptive system","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.30410000681877136},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C200331156","wikidata":"https://www.wikidata.org/wiki/Q506041","display_name":"Jacobian matrix and determinant","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.263700008392334},{"id":"https://openalex.org/C2777851325","wikidata":"https://www.wikidata.org/wiki/Q7094102","display_name":"Online model","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.25200000405311584},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.2513999938964844},{"id":"https://openalex.org/C2779136372","wikidata":"https://www.wikidata.org/wiki/Q10283002","display_name":"Information flow","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.20057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.20057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Effective":[0,15],"robotic":[1,16,48],"manipulation":[2,17,49],"requires":[3,18],"policies":[4,19],"that":[5,20,61,139,166],"can":[6,21,76],"anticipate":[7,22],"physical":[8,23,172],"outcomes":[9,24],"and":[10,25,96,147,171,183],"adapt":[11,26],"to":[12,27,46,82,152,168,195],"real-world":[13,28],"environments.":[14,29],"In":[30],"this":[31],"work,":[32],"we":[33],"introduce":[34],"a":[35,90,97,132,148,162,179],"unified":[36],"framework,":[37],"World-Model-Driven":[38],"Diffusion":[39,105],"Policy":[40],"with":[41,53,175,191],"Online":[42,134],"Adaptive":[43,135],"Learning":[44,136],"(AdaWorldPolicy)":[45],"enhance":[47],"under":[50],"dynamic":[51,73,84],"conditions":[52],"minimal":[54,176],"human":[55],"involvement.":[56],"Our":[57,87],"core":[58],"insight":[59],"is":[60],"world":[62,91],"models":[63],"provide":[64],"strong":[65],"supervision":[66],"signals,":[67],"enabling":[68,116],"online":[69],"adaptive":[70,193],"learning":[71,122],"in":[72],"environments,":[74],"which":[75],"be":[77],"complemented":[78],"by":[79],"force-torque":[80],"feedback":[81],"mitigate":[83],"force":[85,98],"shifts.":[86],"AdaWorldPolicy":[88,187],"integrates":[89],"model,":[92],"an":[93,143],"action":[94],"expert,":[95],"predictor-all":[99],"implemented":[100],"as":[101],"interconnected":[102,110],"Flow":[103],"Matching":[104],"Transformers":[106],"(DiT).":[107],"They":[108],"are":[109],"via":[111],"the":[112],"multi-modal":[113],"self-attention":[114],"layers,":[115],"deep":[117],"feature":[118],"exchange":[119],"for":[120],"joint":[121],"while":[123],"preserving":[124],"their":[125],"distinct":[126],"modularity":[127],"characteristics.":[128],"We":[129],"further":[130],"propose":[131],"novel":[133],"(AdaOL)":[137],"strategy":[138],"dynamically":[140],"switches":[141],"between":[142],"Action":[144],"Generation":[145],"mode":[146,151],"Future":[149],"Imagination":[150],"drive":[153],"reactive":[154],"updates":[155],"across":[156],"all":[157],"three":[158],"modules.":[159],"This":[160],"creates":[161],"powerful":[163],"closed-loop":[164],"mechanism":[165],"adapts":[167],"both":[169],"visual":[170],"domain":[173],"shifts":[174],"overhead.":[177],"Across":[178],"suite":[180],"of":[181],"simulated":[182],"real-robot":[184],"benchmarks,":[185],"our":[186],"achieves":[188],"state-of-the-art":[189],"performance,":[190],"dynamical":[192],"capacity":[194],"out-of-distribution":[196],"scenarios.":[197]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
