{"id":"https://openalex.org/W7138893360","doi":"https://doi.org/10.48550/arxiv.2603.15975","title":"UMO: Unified In-Context Learning Unlocks Motion Foundation Model Priors","display_name":"UMO: Unified In-Context Learning Unlocks Motion Foundation Model Priors","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138893360","doi":"https://doi.org/10.48550/arxiv.2603.15975"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15975","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15975","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15975","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130030337","display_name":"Xiaoyan Cong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cong, Xiaoyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130004434","display_name":"Zekun Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zekun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130124248","display_name":"Zhiyang Dou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Zhiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129757083","display_name":"Hongyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111226930","display_name":"Omid Taheri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taheri, Omid","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130102546","display_name":"Chuan Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Chuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129854526","display_name":"Abhay Mittal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mittal, Abhay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101691659","display_name":"Sizhe An","orcid":"https://orcid.org/0000-0002-9211-4886"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Sizhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129971421","display_name":"Taku Komura","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Komura, Taku","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018010391","display_name":"Wojciech Matusik","orcid":"https://orcid.org/0000-0003-0212-5643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matusik, Wojciech","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130054122","display_name":"Michael J. Black","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Black, Michael J.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129918780","display_name":"Srinath Sridhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sridhar, Srinath","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5130030337"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4471000134944916,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4471000134944916,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.24220000207424164,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09019999951124191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.8093000054359436},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6697999835014343},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5871000289916992},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.535099983215332},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.489300012588501},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4535999894142151},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4237000048160553}],"concepts":[{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.8093000054359436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7337999939918518},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6697999835014343},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5871000289916992},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5605000257492065},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.535099983215332},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.489300012588501},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46470001339912415},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4535999894142151},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.29750001430511475},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2565999925136566},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15975","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15975","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15975","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15975","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"foundation":[1],"models":[2],"(LFMs)":[3],"have":[4],"recently":[5],"made":[6],"impressive":[7],"progress":[8],"in":[9,41,65],"text-to-motion":[10,39,175],"generation":[11,48,86],"by":[12],"learning":[13],"strong":[14],"generative":[15,59,123],"priors":[16,60,77,124],"from":[17],"massive":[18],"3D":[19],"human":[20],"motion":[21,36,47,85,128,187],"datasets":[22],"and":[23,31,45,142,192,203,219],"paired":[24],"text":[25],"descriptions.":[26],"However,":[27],"how":[28],"to":[29,61,74,78,120,138,147,160,174,177],"effectively":[30],"efficiently":[32],"leverage":[33],"such":[34,76],"single-purpose":[35],"LFMs,":[37],"i.e.,":[38],"synthesis,":[40],"more":[42],"diverse":[43,108,179],"cross-modal":[44],"in-context":[46,118,149],"downstream":[49,63,84,109],"tasks":[50,64,87,110],"remains":[51],"largely":[52],"unclear.":[53],"Prior":[54],"work":[55],"typically":[56],"adapts":[57],"pretrained":[58,126,153,170],"individual":[62],"a":[66,80,89,100,207,214],"task-specific":[67,202],"manner.":[68],"In":[69],"contrast,":[70],"our":[71],"goal":[72],"is":[73],"unlock":[75,121],"support":[79,178],"broad":[81],"spectrum":[82],"of":[83,113,125,210],"within":[88],"single":[90,215],"unified":[91,104,216],"framework.":[92],"To":[93],"bridge":[94],"this":[95,165],"gap,":[96],"we":[97],"present":[98],"UMO,":[99],"simple":[101],"yet":[102],"general":[103],"formulation":[105],"that":[106,198],"casts":[107],"into":[111,151],"compositions":[112],"atomic":[114],"per-frame":[115,140],"operations,":[116],"enabling":[117],"adaptation":[119],"the":[122,152,161,169],"DiT-based":[127],"LFMs.":[129],"Specifically,":[130],"UMO":[131,167,199],"introduces":[132],"three":[133],"learnable":[134],"frame-level":[135],"meta-operation":[136],"embeddings":[137],"specify":[139],"intent":[141],"employs":[143],"lightweight":[144],"temporal":[145,184],"fusion":[146],"inject":[148],"cues":[150],"backbone,":[154],"with":[155],"negligible":[156],"runtime":[157],"overhead":[158],"compared":[159],"base":[162],"model.":[163,217],"With":[164],"design,":[166],"finetunes":[168],"model,":[171],"originally":[172],"limited":[173],"generation,":[176],"previously":[180],"unsupported":[181],"tasks,":[182],"including":[183],"inpainting,":[185],"text-guided":[186],"editing,":[188],"text-serialized":[189],"geometric":[190],"constraints,":[191],"multi-identity":[193],"reaction":[194],"generation.":[195],"Experiments":[196],"demonstrate":[197],"consistently":[200],"outperforms":[201],"training-free":[204],"baselines":[205],"across":[206],"wide":[208],"range":[209],"benchmarks,":[211],"despite":[212],"using":[213],"Code":[218],"model":[220],"will":[221],"be":[222],"publicly":[223],"available.":[224],"Project":[225],"Page:":[226],"https://oliver-cong02.github.io/UMO.github.io/":[227]},"counts_by_year":[],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-03-20T00:00:00"}
