{"id":"https://openalex.org/W4360892048","doi":"https://doi.org/10.48550/arxiv.2303.13496","title":"The effectiveness of MAE pre-pretraining for billion-scale pretraining","display_name":"The effectiveness of MAE pre-pretraining for billion-scale pretraining","publication_year":2023,"publication_date":"2023-03-23","ids":{"openalex":"https://openalex.org/W4360892048","doi":"https://doi.org/10.48550/arxiv.2303.13496"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2303.13496","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13496","pdf_url":"https://arxiv.org/pdf/2303.13496","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2303.13496","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060023106","display_name":"Mannat Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Singh, Mannat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073033067","display_name":"Quentin Duval","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duval, Quentin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078659702","display_name":"Kalyan Vasudev Alwala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alwala, Kalyan Vasudev","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022792966","display_name":"Haoqi Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Haoqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101859535","display_name":"Vaibhav Aggarwal","orcid":"https://orcid.org/0000-0002-7504-3297"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aggarwal, Vaibhav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038119134","display_name":"Aaron Adcock","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adcock, Aaron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107859338","display_name":"Armand Joulin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joulin, Armand","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057866698","display_name":"Piotr Doll\u00e1r","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doll\u00e1r, Piotr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036069974","display_name":"Christoph Feichtenhofer","orcid":"https://orcid.org/0000-0001-9756-7238"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feichtenhofer, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049246408","display_name":"Ross Girshick","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Girshick, Ross","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006312307","display_name":"Rohit Girdhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Girdhar, Rohit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000623592","display_name":"Ishan Misra","orcid":"https://orcid.org/0000-0001-7708-7261"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Misra, Ishan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5060023106"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.7800329923629761},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7602133750915527},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6753156185150146},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6129587292671204},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5638492107391357},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.508735716342926},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.508042573928833},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5010685920715332},{"id":"https://openalex.org/keywords/one-shot","display_name":"One shot","score":0.41634097695350647}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.7800329923629761},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7602133750915527},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6753156185150146},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6129587292671204},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5638492107391357},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.508735716342926},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.508042573928833},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5010685920715332},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.41634097695350647},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2303.13496","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13496","pdf_url":"https://arxiv.org/pdf/2303.13496","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2303.13496","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2303.13496","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2303.13496","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13496","pdf_url":"https://arxiv.org/pdf/2303.13496","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"score":0.5799999833106995,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4360892048.pdf","grobid_xml":"https://content.openalex.org/works/W4360892048.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386121542","https://openalex.org/W2497720472","https://openalex.org/W4292659306","https://openalex.org/W3044321615","https://openalex.org/W4294892107","https://openalex.org/W2806221744","https://openalex.org/W2326937258","https://openalex.org/W394267150","https://openalex.org/W2773965352","https://openalex.org/W2357748469"],"abstract_inverted_index":{"This":[0],"paper":[1],"revisits":[2],"the":[3,42,48,59,69,72,99,103,128],"standard":[4],"pretrain-then-finetune":[5],"paradigm":[6],"used":[7],"in":[8],"computer":[9],"vision":[10],"for":[11,91,182],"visual":[12,135],"recognition":[13,136],"tasks.":[14],"Typically,":[15],"state-of-the-art":[16,155],"foundation":[17,93],"models":[18,191],"are":[19,192],"pretrained":[20],"using":[21],"large":[22],"scale":[23,57],"(weakly)":[24],"supervised":[25],"datasets":[26],"with":[27,58,68,82,185],"billions":[28,115,123,186],"of":[29,61,71,110,116,124,130,187],"images.":[30],"We":[31,126],"introduce":[32],"an":[33],"additional":[34],"pre-pretraining":[35,80,131],"stage":[36],"that":[37,65,174],"is":[38],"simple":[39],"and":[40,85,102,118,147,165,189],"uses":[41],"self-supervised":[43],"MAE":[44,51],"technique":[45],"to":[46,56,114,122],"initialize":[47],"model.":[49],"While":[50],"has":[52],"only":[53],"been":[54],"shown":[55],"size":[60,70,87],"models,":[62],"we":[63],"find":[64],"it":[66,89],"scales":[67,81,112],"training":[73,92],"dataset":[74,119],"as":[75],"well.":[76],"Thus,":[77],"our":[78,190],"MAE-based":[79],"both":[83,98],"model":[84,100,111,152,175],"data":[86],"making":[88],"applicable":[90],"models.":[94],"Pre-pretraining":[95],"consistently":[96],"improves":[97],"convergence":[101],"downstream":[104],"transfer":[105,167],"performance":[106],"across":[107],"a":[108,178],"range":[109],"(millions":[113,121],"parameters),":[117],"sizes":[120],"images).":[125],"measure":[127],"effectiveness":[129],"on":[132,157,168],"10":[133],"different":[134],"tasks":[137],"spanning":[138],"image":[139],"classification,":[140],"video":[141],"recognition,":[142],"object":[143],"detection,":[144],"low-shot":[145],"classification":[146],"zero-shot":[148,166],"recognition.":[149],"Our":[150,171],"largest":[151],"achieves":[153],"new":[154],"results":[156],"iNaturalist-18":[158],"(91.7%),":[159],"ImageNet-ReaL":[160],"(91.1%),":[161],"1-shot":[162],"ImageNet-1k":[163],"(63.6%),":[164],"Food-101":[169],"(96.2%).":[170],"study":[172],"reveals":[173],"initialization":[176],"plays":[177],"significant":[179],"role,":[180],"even":[181],"web-scale":[183],"pretraining":[184],"images,":[188],"available":[193],"publicly.":[194]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
