{"id":"https://openalex.org/W7160304585","doi":"https://doi.org/10.48550/arxiv.2605.02881","title":"MolmoAct2: Action Reasoning Models for Real-world Deployment","display_name":"MolmoAct2: Action Reasoning Models for Real-world Deployment","publication_year":2026,"publication_date":"2026-05-04","ids":{"openalex":"https://openalex.org/W7160304585","doi":"https://doi.org/10.48550/arxiv.2605.02881"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.02881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.02881","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085879597","display_name":"Haoquan Fang","orcid":"https://orcid.org/0009-0007-9759-3867"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Haoquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027184297","display_name":"Jiafei Duan","orcid":"https://orcid.org/0000-0003-4551-2424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Jiafei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120271016","display_name":"Donovan Clay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Clay, Donovan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128076726","display_name":"Sam Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135410975","display_name":"Shuo Liu","orcid":"https://orcid.org/0000-0002-0293-585X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104331008","display_name":"Weikai Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Weikai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135350979","display_name":"Xiang Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Xiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125489104","display_name":"Wei-Chuan Tsai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsai, Wei-Chuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135306203","display_name":"Shirui Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shirui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082014572","display_name":"Yi Ru Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yi Ru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122215773","display_name":"Shanli Xing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing, Shanli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135364871","display_name":"Jaemin Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cho, Jaemin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135373354","display_name":"Jae Sung Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jae Sung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004894291","display_name":"Ainaz Eftekhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eftekhar, Ainaz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135309215","display_name":"Peter Sushko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sushko, Peter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135371044","display_name":"Karen Farley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Farley, Karen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135292219","display_name":"Angad Wadhwa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wadhwa, Angad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126671977","display_name":"Cole Harrison","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harrison, Cole","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135303484","display_name":"Winson Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Winson","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125941368","display_name":"Ying-Chun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Ying-Chun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135348047","display_name":"Eli VanderBilt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"VanderBilt, Eli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135297838","display_name":"Rose Hendrix","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hendrix, Rose","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093398129","display_name":"Suveen Ellawela","orcid":"https://orcid.org/0009-0008-0269-6036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ellawela, Suveen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080590910","display_name":"Lucas Ngoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ngoo, Lucas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135403817","display_name":"Joyce Chai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chai, Joyce","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024951066","display_name":"Zhongzheng Ren","orcid":"https://orcid.org/0000-0003-1033-5341"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Zhongzheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135321925","display_name":"Ali Farhadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Farhadi, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135398273","display_name":"Dieter Fox","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fox, Dieter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135286835","display_name":"Ranjay Krishna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishna, Ranjay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":29,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8410000205039978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8410000205039978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.0333000011742115,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.028999999165534973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6793000102043152},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5347999930381775},{"id":"https://openalex.org/keywords/teleoperation","display_name":"Teleoperation","score":0.5171999931335449},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5167999863624573},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.40619999170303345},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.38449999690055847},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.3334999978542328},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3109999895095825},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.30979999899864197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7526999711990356},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6793000102043152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.628600001335144},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5347999930381775},{"id":"https://openalex.org/C161759796","wikidata":"https://www.wikidata.org/wiki/Q3982902","display_name":"Teleoperation","level":3,"score":0.5171999931335449},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5167999863624573},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40779998898506165},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.40619999170303345},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3357999920845032},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C132829578","wikidata":"https://www.wikidata.org/wiki/Q581151","display_name":"Situated","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.30090001225471497},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.29030001163482666},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.02881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.02881","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02881","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","score":0.4122520089149475,"id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2,26],"aim":[3],"to":[4,33,119,149,202],"provide":[5,130],"a":[6,58,77,88,92,151,156,187],"single":[7],"generalist":[8],"controller":[9],"for":[10,22,41,52,65,81,176],"robots,":[11],"but":[12],"today's":[13],"systems":[14],"fall":[15],"short":[16],"on":[17,87,138],"the":[18,50,114,147,193],"criteria":[19],"that":[20,112,171,179],"matter":[21],"real-world":[23,208],"deployment.":[24],"Frontier":[25],"are":[27,31],"closed,":[28],"open-weight":[29],"alternatives":[30],"tied":[32],"expensive":[34],"hardware,":[35],"reasoning-augmented":[36],"policies":[37],"pay":[38],"prohibitive":[39],"latency":[40],"their":[42],"grounding,":[43],"and":[44,83,126,207,220,234],"fine-tuned":[45],"success":[46],"rates":[47],"remain":[48],"below":[49],"threshold":[51],"dependable":[53],"use.":[54],"We":[55,74,95,129,145,228],"present":[56],"MolmoAct2,":[57],"fully":[59],"open":[60,116,200],"action":[61,135],"reasoning":[62,169],"model":[63,230],"built":[64],"practical":[66],"deployment,":[67],"advancing":[68],"its":[69],"predecessor":[70],"along":[71],"five":[72,143],"axes.":[73],"introduce":[75],"MolmoER,":[76],"VLM":[78,158],"backbone":[79],"specialized":[80],"spatial":[82],"embodied":[84],"reasoning,":[85],"trained":[86,137],"3.3M-sample":[89],"corpus":[90],"with":[91,122],"specialize-then-rehearse":[93],"recipe.":[94],"release":[96,229],"three":[97],"new":[98],"datasets":[99],"spanning":[100,204],"low-to-medium":[101],"cost":[102],"platforms,":[103],"including":[104,214],"MolmoAct2-BimanualYAM,":[105],"720":[106],"hours":[107],"of":[108,140,189,198],"teleoperated":[109],"bimanual":[110,117],"trajectories":[111,141],"constitute":[113],"largest":[115],"dataset":[118],"date,":[120,203],"together":[121],"quality-filtered":[123],"Franka":[124],"(DROID)":[125],"SO100/101":[127],"subsets.":[128],"OpenFAST,":[131],"an":[132,167],"open-weight,":[133],"open-data":[134],"tokenizer":[136],"millions":[139],"across":[142,224],"embodiments.":[144],"redesign":[146],"architecture":[148],"graft":[150],"flow-matching":[152],"continuous-action":[153],"expert":[154],"onto":[155],"discrete-token":[157],"via":[159],"per-layer":[160],"KV-cache":[161],"conditioning.":[162],"Finally,":[163],"we":[164],"propose":[165],"MolmoThink,":[166],"adaptive-depth":[168],"variant":[170],"re-predicts":[172],"depth":[173],"tokens":[174],"only":[175],"scene":[177],"regions":[178],"change":[180],"between":[181],"timesteps,":[182],"retaining":[183],"geometric":[184],"grounding":[185],"at":[186],"fraction":[188],"prior":[190],"latency.":[191],"In":[192],"most":[194],"extensive":[195],"empirical":[196],"study":[197],"any":[199],"VLA":[201],"7":[205],"simulation":[206],"benchmarks,":[209],"MolmoAct2":[210],"outperforms":[211],"strong":[212],"baselines":[213],"Pi-05,":[215],"while":[216],"MolmoER":[217],"surpasses":[218],"GPT-5":[219],"Gemini":[221],"Robotics":[222],"ER-1.5":[223],"13":[225],"embodied-reasoning":[226],"benchmarks.":[227],"weights,":[231],"training":[232,236],"code,":[233],"complete":[235],"data.":[237],"Project":[238],"page:":[239],"https://allenai.org/blog/molmoact2":[240]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
