{"id":"https://openalex.org/W4392873997","doi":"https://doi.org/10.48550/arxiv.2403.09611","title":"MM1: Methods, Analysis &amp; Insights from Multimodal LLM Pre-training","display_name":"MM1: Methods, Analysis &amp; Insights from Multimodal LLM Pre-training","publication_year":2024,"publication_date":"2024-03-14","ids":{"openalex":"https://openalex.org/W4392873997","doi":"https://doi.org/10.48550/arxiv.2403.09611"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.09611","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.09611","pdf_url":"https://arxiv.org/pdf/2403.09611","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.09611","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047462684","display_name":"Brandon McKinzie","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"McKinzie, Brandon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066666034","display_name":"Zhe Gan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Zhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041899005","display_name":"Jean-Philippe Fauconnier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fauconnier, Jean-Philippe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094170435","display_name":"Sam Dodge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dodge, Sam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101722358","display_name":"Bowen Zhang","orcid":"https://orcid.org/0000-0001-9982-2968"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Bowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012225484","display_name":"Philipp Dufter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dufter, Philipp","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114136682","display_name":"Dhruti Shah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shah, Dhruti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017048711","display_name":"Xianzhi Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xianzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113144371","display_name":"Futang Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Futang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048093554","display_name":"Floris Weers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weers, Floris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094170433","display_name":"Anton Belyi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Belyi, Anton","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100392968","display_name":"Haotian Zhang","orcid":"https://orcid.org/0000-0003-0844-3730"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haotian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101580804","display_name":"K. Singh","orcid":"https://orcid.org/0000-0002-3184-3225"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Karanjeet","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111148382","display_name":"Doug Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Doug","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jain, Ankur","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jain, Ankur","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113144370","display_name":"Hongyu H\u00e8","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H\u00e8, Hongyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015580523","display_name":"Max Schwarzer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwarzer, Max","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019477718","display_name":"Tom Gunter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gunter, Tom","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100688402","display_name":"Xiang Kong","orcid":"https://orcid.org/0009-0006-3158-1495"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047876033","display_name":"Aonan Zhang","orcid":"https://orcid.org/0000-0002-6310-4769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Aonan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115597626","display_name":"Jianyu Wang","orcid":"https://orcid.org/0009-0000-3617-8562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115591208","display_name":"Chong Wang","orcid":"https://orcid.org/0009-0007-4414-5147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005166456","display_name":"Nan Du","orcid":"https://orcid.org/0000-0002-2728-917X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Nan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100707254","display_name":"Tao Lei","orcid":"https://orcid.org/0000-0002-3010-8085"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029332910","display_name":"Sam M. Wiseman","orcid":"https://orcid.org/0000-0002-3160-4697"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wiseman, Sam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yin, Guoli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Guoli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100620072","display_name":"Mark Lee","orcid":"https://orcid.org/0000-0001-6643-4360"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Mark","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105989076","display_name":"Zirui Wang","orcid":"https://orcid.org/0009-0006-6976-9522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zirui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112901893","display_name":"Ruoming Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Ruoming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094170434","display_name":"Peter Grasch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grasch, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064265174","display_name":"Alexander Toshev","orcid":"https://orcid.org/0000-0003-0925-638X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toshev, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5083263708","display_name":"Yinfei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yinfei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":32,"corresponding_author_ids":["https://openalex.org/A5047462684"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5755407214164734},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4282534718513489},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3388529121875763},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.12943723797798157}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5755407214164734},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4282534718513489},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3388529121875763},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12943723797798157},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.09611","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.09611","pdf_url":"https://arxiv.org/pdf/2403.09611","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.09611","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.09611","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.09611","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.09611","pdf_url":"https://arxiv.org/pdf/2403.09611","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W2358668433","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2997094352"],"abstract_inverted_index":{"In":[0,12],"this":[1],"work,":[2],"we":[3,14,43,51,87,121],"discuss":[4],"building":[5],"performant":[6],"Multimodal":[7],"Large":[8],"Language":[9],"Models":[10],"(MLLMs).":[11],"particular,":[13],"study":[15],"the":[16,31,34,90,98,106,118],"importance":[17],"of":[18,30,62,111,126,157],"various":[19,39],"architecture":[20],"components":[21],"and":[22,27,38,66,97,137,147,174],"data":[23,41,68],"choices.":[24],"Through":[25],"careful":[26,60],"comprehensive":[28],"ablations":[29],"image":[32,91,95,99],"encoder,":[33],"vision":[35],"language":[36],"connector,":[37],"pre-training":[40,57,84,145],"choices,":[42],"identified":[44],"several":[45],"crucial":[46,70],"design":[47,109],"lessons.":[48],"For":[49],"example,":[50],"demonstrate":[52],"that":[53,89,141],"for":[54,71],"large-scale":[55,163],"multimodal":[56,127,159],"using":[58],"a":[59,124,155],"mix":[61],"image-caption,":[63],"interleaved":[64],"image-text,":[65],"text-only":[67],"is":[69,110],"achieving":[72],"state-of-the-art":[73],"(SOTA)":[74],"few-shot":[75,178],"results":[76],"across":[77],"multiple":[78],"benchmarks,":[79],"compared":[80],"to":[81,130,162],"other":[82],"published":[83],"results.":[85],"Further,":[86],"show":[88],"encoder":[92],"together":[93],"with":[94],"resolution":[96],"token":[100],"count":[101],"has":[102],"substantial":[103],"impact,":[104],"while":[105],"vision-language":[107],"connector":[108],"comparatively":[112],"negligible":[113],"importance.":[114],"By":[115],"scaling":[116],"up":[117,129],"presented":[119],"recipe,":[120],"build":[122],"MM1,":[123],"family":[125],"models":[128,136],"30B":[131],"parameters,":[132],"including":[133],"both":[134],"dense":[135],"mixture-of-experts":[138],"(MoE)":[139],"variants,":[140],"are":[142],"SOTA":[143],"in":[144],"metrics":[146],"achieve":[148],"competitive":[149],"performance":[150],"after":[151],"supervised":[152],"fine-tuning":[153],"on":[154],"range":[156],"established":[158],"benchmarks.":[160],"Thanks":[161],"pre-training,":[164],"MM1":[165],"enjoys":[166],"appealing":[167],"properties":[168],"such":[169],"as":[170],"enhanced":[171],"in-context":[172],"learning,":[173],"multi-image":[175],"reasoning,":[176],"enabling":[177],"chain-of-thought":[179],"prompting.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2024-03-16T00:00:00"}
