{"id":"https://openalex.org/W4413157708","doi":"https://doi.org/10.1109/cvpr52734.2025.01865","title":"From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling","display_name":"From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413157708","doi":"https://doi.org/10.1109/cvpr52734.2025.01865"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.01865","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01865","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022596397","display_name":"Jin\u2010Hong Lin","orcid":"https://orcid.org/0000-0002-7000-9540"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jinhong Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018211952","display_name":"Cheng\u2010En Wu","orcid":"https://orcid.org/0000-0002-3732-0759"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng-En Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014205687","display_name":"Huanran Li","orcid":"https://orcid.org/0000-0002-9929-607X"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huanran Li","raw_affiliation_strings":["University of Wisconsin&#x2013;Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin&#x2013;Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080108539","display_name":"Jifan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jifan Zhang","raw_affiliation_strings":["University of Wisconsin&#x2013;Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin&#x2013;Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101816319","display_name":"Yu Hu","orcid":"https://orcid.org/0000-0002-6302-3539"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Hen Hu","raw_affiliation_strings":["University of Wisconsin&#x2013;Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin&#x2013;Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009992645","display_name":"Pedro Morgado","orcid":"https://orcid.org/0000-0003-3880-3258"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pedro Morgado","raw_affiliation_strings":["University of Wisconsin&#x2013;Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin&#x2013;Madison","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5022596397"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31174273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"20028","last_page":"20038"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9243999719619751,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9243999719619751,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6738901734352112},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45919573307037354},{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.4538974165916443},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4093450903892517},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.34750622510910034},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.08066850900650024}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6738901734352112},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45919573307037354},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.4538974165916443},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4093450903892517},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.34750622510910034},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.08066850900650024},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.01865","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01865","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W1977295328","https://openalex.org/W2017814585","https://openalex.org/W2047643928","https://openalex.org/W2051224630","https://openalex.org/W2108598243","https://openalex.org/W2155904486","https://openalex.org/W2533598788","https://openalex.org/W2963420272","https://openalex.org/W3035524453","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W4238292595","https://openalex.org/W4308503280","https://openalex.org/W4312262772","https://openalex.org/W4312685069","https://openalex.org/W4312788538","https://openalex.org/W4312804044","https://openalex.org/W4313156423","https://openalex.org/W4386057769","https://openalex.org/W4389104669","https://openalex.org/W4402726986","https://openalex.org/W4403049035","https://openalex.org/W4404024989"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Masked":[0,150],"Image":[1],"Modeling":[2],"(MIM)":[3],"has":[4,36],"emerged":[5],"as":[6],"a":[7,79,104,164,172],"powerful":[8],"self-supervised":[9,168],"learning":[10,82,87,119,131],"paradigm":[11],"for":[12],"visual":[13,21,70,169],"representation":[14,139],"learning,":[15,170],"enabling":[16,114],"models":[17,56],"to":[18,59,89,94,148,175],"acquire":[19],"rich":[20],"representations":[22],"by":[23,48],"predicting":[24],"masked":[25],"portions":[26],"of":[27,160],"images":[28],"from":[29,64,91],"their":[30],"visible":[31],"regions.":[32],"While":[33],"this":[34,75],"approach":[35,102],"shown":[37],"promising":[38],"results,":[39],"we":[40,77,126],"hypothesize":[41],"that":[42,84,108,128,155],"its":[43],"effectiveness":[44],"may":[45],"be":[46],"limited":[47],"optimization":[49,178],"challenges":[50,179],"during":[51],"early":[52],"training":[53,112,136,145,161],"stages,":[54],"where":[55],"are":[57],"expected":[58],"learn":[60],"complex":[61,96],"image":[62],"distributions":[63],"partial":[65],"observations":[66],"before":[67],"developing":[68],"basic":[69],"processing":[71],"capabilities.":[72],"To":[73],"address":[74],"limitation,":[76],"propose":[78],"prototype-driven":[80],"curriculum":[81,130],"framework":[83],"structures":[85],"the":[86,99,111,158,176],"process":[88],"progress":[90],"prototypical":[92],"examples":[93,162],"more":[95,115],"variations":[97],"in":[98,167,180],"dataset.":[100],"Our":[101,152],"introduces":[103],"temperature-based":[105],"annealing":[106],"scheme":[107],"gradually":[109],"expands":[110],"distribution,":[113],"stable":[116],"and":[117,138],"efficient":[118],"trajectories.":[120],"Through":[121],"extensive":[122],"experiments":[123],"on":[124],"ImageNet-1K,":[125],"demonstrate":[127],"our":[129],"strategy":[132],"significantly":[133],"improves":[134],"both":[135],"efficiency":[137],"quality":[140],"while":[141],"requiring":[142],"substantially":[143],"fewer":[144],"epochs":[146],"compared":[147],"standard":[149],"Auto-Encoding.":[151],"findings":[153],"suggest":[154],"carefully":[156],"controlling":[157],"order":[159],"plays":[163],"crucial":[165],"role":[166],"providing":[171],"practical":[173],"solution":[174],"early-stage":[177],"MIM.":[181]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
