{"id":"https://openalex.org/W7137920515","doi":"https://doi.org/10.48550/arxiv.2603.14145","title":"MMOU: A Massive Multi-Task Omni Understanding and Reasoning Benchmark for Long and Complex Real-World Videos","display_name":"MMOU: A Massive Multi-Task Omni Understanding and Reasoning Benchmark for Long and Complex Real-World Videos","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137920515","doi":"https://doi.org/10.48550/arxiv.2603.14145"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.14145","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14145","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.14145","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031240854","display_name":"Arushi Goel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goel, Arushi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129663109","display_name":"Sreyan Ghosh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghosh, Sreyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029424334","display_name":"Vatsal Agarwal","orcid":"https://orcid.org/0009-0004-9470-198X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Vatsal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076391965","display_name":"Nishit Anand","orcid":"https://orcid.org/0000-0001-9391-1871"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anand, Nishit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056483046","display_name":"K. S. Jayakumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jayakumar, Kaousheik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129675521","display_name":"Lasha Koroshinadze","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koroshinadze, Lasha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129711632","display_name":"Yao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129646781","display_name":"Katie Lyons","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyons, Katie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089849858","display_name":"James L. Case","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Case, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060354987","display_name":"Karan Sapra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sapra, Karan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016107863","display_name":"Kevin J. Shih","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shih, Kevin J.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006331697","display_name":"Siddharth Gururani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gururani, Siddharth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129671601","display_name":"Abhinav Shrivastava","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shrivastava, Abhinav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013222310","display_name":"Ramani Duraiswami","orcid":"https://orcid.org/0000-0002-5596-8460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duraiswami, Ramani","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129679639","display_name":"Dinesh Manocha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manocha, Dinesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129717379","display_name":"Andrew Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Andrew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129714666","display_name":"Bryan Catanzaro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Catanzaro, Bryan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129717427","display_name":"Mohammad Shoeybi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shoeybi, Mohammad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129678277","display_name":"Wei Ping","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ping, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7700999975204468,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7700999975204468,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.05469999834895134,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.03750000149011612,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8482999801635742},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6525999903678894},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.5485000014305115},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.41819998621940613},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.3589000105857849},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.2777999937534332}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8482999801635742},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7828999757766724},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6525999903678894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5527999997138977},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.5485000014305115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4341000020503998},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3589000105857849},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2935999929904938},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.14145","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14145","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.14145","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14145","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8552941679954529,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"have":[5],"shown":[6],"strong":[7],"performance":[8,132],"in":[9,16,31,170],"visual":[10],"and":[11,28,33,51,76,98,114,122,182,187],"audio":[12],"understanding":[13,50],"when":[14],"evaluated":[15],"isolation.":[17],"However,":[18],"their":[19],"ability":[20],"to":[21,46,165],"jointly":[22],"reason":[23],"over":[24],"omni-modal":[25,157],"(visual,":[26],"audio,":[27],"textual)":[29],"signals":[30],"long":[32,171],"complex":[34],"videos":[35,69],"remains":[36],"largely":[37],"unexplored.":[38],"We":[39,117],"introduce":[40],"MMOU,":[41],"a":[42],"new":[43],"benchmark":[44,84],"designed":[45],"systematically":[47],"evaluate":[48,118],"multimodal":[49,124],"reasoning":[52,115],"under":[53],"these":[54],"challenging,":[55],"real-world":[56],"conditions.":[57],"MMOU":[58],"consists":[59],"of":[60,70,91,155],"20,000":[61],"carefully":[62],"curated":[63],"questions":[64,101],"paired":[65],"with":[66],"11877":[67],"web-collected":[68],"varying":[71],"length,":[72],"spanning":[73],"diverse":[74],"domains":[75],"exhibiting":[77],"rich,":[78],"tightly":[79],"coupled":[80],"audio-visual":[81],"content.":[82],"The":[83,128],"covers":[85],"13":[86],"fundamental":[87,168],"skill":[88],"categories,":[89],"all":[90],"which":[92],"require":[93],"integrating":[94],"evidence":[95],"across":[96,105],"modalities":[97],"time.":[99],"All":[100],"are":[102],"manually":[103],"annotated":[104],"multiple":[106],"turns":[107],"by":[108],"professional":[109],"annotators,":[110],"ensuring":[111],"high":[112],"quality":[113],"fidelity.":[116],"20+":[119],"state-of-the-art":[120],"open-source":[121,145],"proprietary":[123],"models":[125,162,190],"on":[126],"MMOU.":[127],"results":[129,151],"expose":[130],"substantial":[131],"gaps:":[133],"the":[134,143,153],"best":[135],"closed-source":[136],"model":[137,146],"achieves":[138],"only":[139],"64.2%":[140],"accuracy,":[141],"while":[142],"strongest":[144],"reaches":[147],"just":[148],"46.8%.":[149],"Our":[150],"highlight":[152],"challenges":[154],"long-form":[156],"understanding,":[158],"revealing":[159],"that":[160],"current":[161,189],"frequently":[163],"fail":[164],"apply":[166],"even":[167],"skills":[169],"videos.":[172],"Through":[173],"detailed":[174],"analysis,":[175],"we":[176],"further":[177],"identify":[178],"systematic":[179],"failure":[180],"modes":[181],"provide":[183],"insights":[184],"into":[185],"where":[186],"why":[188],"break.":[191]},"counts_by_year":[],"updated_date":"2026-06-24T06:17:17.354583","created_date":"2026-03-18T00:00:00"}
