{"id":"https://openalex.org/W4405090109","doi":"https://doi.org/10.1109/iccv51701.2025.02176","title":"Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations","display_name":"Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4405090109","doi":"https://doi.org/10.1109/iccv51701.2025.02176"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02176","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.03215","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024680186","display_name":"Marcin Przewi\u0119\u017alikowski","orcid":"https://orcid.org/0000-0003-4772-3268"},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Marcin Przewiezlikowski","raw_affiliation_strings":["Jagiellonian University,Faculty of Mathematics and Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jagiellonian University,Faculty of Mathematics and Computer Science","institution_ids":["https://openalex.org/I126596746"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047293370","display_name":"Randall Balestriero","orcid":"https://orcid.org/0000-0002-5692-4187"},"institutions":[{"id":"https://openalex.org/I175594653","display_name":"John Brown University","ror":"https://ror.org/02ct41q97","country_code":"US","type":"education","lineage":["https://openalex.org/I175594653"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Randall Balestriero","raw_affiliation_strings":["Brown University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brown University","institution_ids":["https://openalex.org/I175594653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045836228","display_name":"W. K. Jasi\u0144ski","orcid":null},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Wojciech Jasinski","raw_affiliation_strings":["Jagiellonian University,Faculty of Mathematics and Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jagiellonian University,Faculty of Mathematics and Computer Science","institution_ids":["https://openalex.org/I126596746"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089509620","display_name":"Marek \u015amieja","orcid":"https://orcid.org/0000-0003-2027-4132"},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Marek Smieja","raw_affiliation_strings":["Jagiellonian University,Faculty of Mathematics and Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jagiellonian University,Faculty of Mathematics and Computer Science","institution_ids":["https://openalex.org/I126596746"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058095594","display_name":"Bartosz Zieli\u0144ski","orcid":"https://orcid.org/0000-0002-3063-3621"},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Bartosz Zielinski","raw_affiliation_strings":["Jagiellonian University,Faculty of Mathematics and Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jagiellonian University,Faculty of Mathematics and Computer Science","institution_ids":["https://openalex.org/I126596746"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024680186"],"corresponding_institution_ids":["https://openalex.org/I126596746"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00212585,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"23442","last_page":"23452"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.8348000049591064,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.8348000049591064,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.7509999871253967,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.724399983882904,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cls-upper-limits","display_name":"CLs upper limits","score":0.7682560682296753},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6411428451538086},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43694883584976196},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3615415096282959},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.34547242522239685},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.09894105792045593},{"id":"https://openalex.org/keywords/optometry","display_name":"Optometry","score":0.091644287109375}],"concepts":[{"id":"https://openalex.org/C190729725","wikidata":"https://www.wikidata.org/wiki/Q5012817","display_name":"CLs upper limits","level":2,"score":0.7682560682296753},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6411428451538086},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43694883584976196},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3615415096282959},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34547242522239685},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.09894105792045593},{"id":"https://openalex.org/C119767625","wikidata":"https://www.wikidata.org/wiki/Q618211","display_name":"Optometry","level":1,"score":0.091644287109375}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/iccv51701.2025.02176","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.03215","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.03215","pdf_url":"https://arxiv.org/pdf/2412.03215","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"pmh:oai:ruj.uj.edu.pl:item/572010","is_oa":true,"landing_page_url":"https://openaccess.thecvf.com/content/ICCV2025/html/Przewiezlikowski_Beyond_cls_Exploring_the_True_Potential_of_Masked_Image_Modeling_ICCV_2025_paper.html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400316","display_name":"Homo Politicus (Academy of Humanities and Economics in Lodz)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publikacja pokonferencyjna"},{"id":"doi:10.48550/arxiv.2412.03215","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.03215","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.03215","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.03215","pdf_url":"https://arxiv.org/pdf/2412.03215","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320326815","display_name":"Infrastruktura PL-Grid","ror":null},{"id":"https://openalex.org/F4320337160","display_name":"Academic Computer Centre Cyfronet, AGH University of Science and Technology","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Masked":[0],"Image":[1],"Modeling":[2],"(MIM)":[3],"has":[4],"emerged":[5],"as":[6],"a":[7],"promising":[8],"approach":[9],"for":[10,38,68],"Self-Supervised":[11],"Learning":[12],"(SSL)":[13],"of":[14,21,41,54,73,141],"visual":[15],"representations.":[16],"However,":[17],"the":[18,36,51,66,69,113,127,138],"out-of-the-box":[19,71,139],"performance":[20,72,140],"MIMs":[22,100],"is":[23,57,65,86,101],"typically":[24],"inferior":[25],"to":[26,35,78,89,109,124],"competing":[27],"approaches.":[28],"Most":[29],"users":[30],"cannot":[31],"afford":[32],"fine-tuning":[33],"due":[34,77,88],"need":[37],"large":[39],"amounts":[40],"data,":[42],"high":[43],"GPU":[44],"consumption,":[45],"and":[46],"specialized":[47],"user":[48],"knowledge.":[49],"Therefore,":[50],"practical":[52],"use":[53],"MIM":[55,83],"representations":[56],"limited.":[58],"In":[59],"this":[60,118],"paper":[61],"we":[62,95,120],"ask":[63],"what":[64],"reason":[67],"poor":[70],"MIMs.":[74],"Is":[75],"it":[76,87],"weaker":[79],"features":[80],"produced":[81],"by":[82,112],"models,":[84],"or":[85],"suboptimal":[90],"usage?":[91],"Through":[92],"detailed":[93],"analysis,":[94],"show":[96],"that":[97],"attention":[98],"in":[99,132],"spread":[102],"almost":[103],"uniformly":[104],"over":[105],"many":[106],"patches,":[107],"leading":[108],"ineffective":[110],"aggregation":[111],"[cls]":[114],"token.":[115],"Based":[116],"on":[117],"insight,":[119],"propose":[121],"Selective":[122],"Aggregation":[123],"better":[125],"capture":[126],"rich":[128],"semantic":[129],"information":[130],"retained":[131],"patch":[133],"tokens,":[134],"which":[135],"significantly":[136],"improves":[137],"MIM.":[142]},"counts_by_year":[],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
