{"id":"https://openalex.org/W4409310592","doi":"https://doi.org/10.1109/tpami.2025.3557001","title":"Bootstrap Masked Visual Modeling via Hard Patch Mining","display_name":"Bootstrap Masked Visual Modeling via Hard Patch Mining","publication_year":2025,"publication_date":"2025-04-09","ids":{"openalex":"https://openalex.org/W4409310592","doi":"https://doi.org/10.1109/tpami.2025.3557001","pmid":"https://pubmed.ncbi.nlm.nih.gov/40202875"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3557001","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3557001","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102189536","display_name":"Haochen Wang","orcid":"https://orcid.org/0000-0002-2333-1844"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haochen Wang","raw_affiliation_strings":["New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050140194","display_name":"Junsong Fan","orcid":"https://orcid.org/0000-0001-6989-2711"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junsong Fan","raw_affiliation_strings":["New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100393131","display_name":"Yuxi Wang","orcid":"https://orcid.org/0000-0003-1579-2357"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxi Wang","raw_affiliation_strings":["New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018410836","display_name":"Kaiyou Song","orcid":"https://orcid.org/0000-0001-8999-2680"},"institutions":[{"id":"https://openalex.org/I4401726805","display_name":"Megvii (China)","ror":"https://ror.org/040b32p69","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726805"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiyou Song","raw_affiliation_strings":["Megvii Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Megvii Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726805"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110647238","display_name":"Tiancai Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726805","display_name":"Megvii (China)","ror":"https://ror.org/040b32p69","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726805"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tiancai Wang","raw_affiliation_strings":["Megvii Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Megvii Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726805"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100362465","display_name":"Xiangyu Zhang","orcid":"https://orcid.org/0000-0003-2138-4608"},"institutions":[{"id":"https://openalex.org/I4401726805","display_name":"Megvii (China)","ror":"https://ror.org/040b32p69","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726805"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zhang","raw_affiliation_strings":["Megvii Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Megvii Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726805"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028016065","display_name":"Zhaoxiang Zhang","orcid":"https://orcid.org/0000-0003-2648-3875"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxiang Zhang","raw_affiliation_strings":["New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"New Laboratory of Pattern Recognition, State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102189536"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":1.3104,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.7906207,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"47","issue":"8","first_page":"6200","last_page":"6214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9383000135421753,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6472087502479553},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5836460590362549},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4257052540779114},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38694125413894653},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33211344480514526}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6472087502479553},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5836460590362549},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4257052540779114},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38694125413894653},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33211344480514526}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3557001","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3557001","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40202875","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40202875","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":121,"referenced_works":["https://openalex.org/W219040644","https://openalex.org/W343636949","https://openalex.org/W1520997877","https://openalex.org/W1861492603","https://openalex.org/W1901129140","https://openalex.org/W2025768430","https://openalex.org/W2037227137","https://openalex.org/W2074466695","https://openalex.org/W2100495367","https://openalex.org/W2107726111","https://openalex.org/W2117539524","https://openalex.org/W2161969291","https://openalex.org/W2194775991","https://openalex.org/W2308529009","https://openalex.org/W2321533354","https://openalex.org/W2326925005","https://openalex.org/W2331143823","https://openalex.org/W2422305492","https://openalex.org/W2470142083","https://openalex.org/W2487442924","https://openalex.org/W2550462002","https://openalex.org/W2558661413","https://openalex.org/W2565639579","https://openalex.org/W2575671312","https://openalex.org/W2599837529","https://openalex.org/W2625366777","https://openalex.org/W2737258237","https://openalex.org/W2798991696","https://openalex.org/W2799087757","https://openalex.org/W2884822772","https://openalex.org/W2888101838","https://openalex.org/W2896457183","https://openalex.org/W2938260698","https://openalex.org/W2948242301","https://openalex.org/W2962770929","https://openalex.org/W2963091558","https://openalex.org/W2963113370","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2963420272","https://openalex.org/W2963426332","https://openalex.org/W2963516811","https://openalex.org/W2963814513","https://openalex.org/W2964037671","https://openalex.org/W2964700958","https://openalex.org/W2990503944","https://openalex.org/W2992308087","https://openalex.org/W3010874390","https://openalex.org/W3035524453","https://openalex.org/W3109908659","https://openalex.org/W3126721948","https://openalex.org/W3129110783","https://openalex.org/W3129409502","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3145385912","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W3170837227","https://openalex.org/W3171007011","https://openalex.org/W4214507759","https://openalex.org/W4214612132","https://openalex.org/W4214614183","https://openalex.org/W4214709605","https://openalex.org/W4221161877","https://openalex.org/W4254968059","https://openalex.org/W4308503280","https://openalex.org/W4312309398","https://openalex.org/W4312312750","https://openalex.org/W4312349930","https://openalex.org/W4312560592","https://openalex.org/W4312804044","https://openalex.org/W4313156423","https://openalex.org/W4382467347","https://openalex.org/W4385245566","https://openalex.org/W4386057769","https://openalex.org/W4386066407","https://openalex.org/W4386071576","https://openalex.org/W4386076203","https://openalex.org/W4386076377","https://openalex.org/W4386076509","https://openalex.org/W4386221015","https://openalex.org/W4390871850","https://openalex.org/W4390872366","https://openalex.org/W4390873627","https://openalex.org/W4390874131","https://openalex.org/W4394625636","https://openalex.org/W4403204221","https://openalex.org/W4404435992","https://openalex.org/W6677326919","https://openalex.org/W6681096077","https://openalex.org/W6691096134","https://openalex.org/W6713563955","https://openalex.org/W6726945602","https://openalex.org/W6739622702","https://openalex.org/W6745136726","https://openalex.org/W6757817989","https://openalex.org/W6778883912","https://openalex.org/W6779997284","https://openalex.org/W6784333009","https://openalex.org/W6791353385","https://openalex.org/W6796761347","https://openalex.org/W6797263693","https://openalex.org/W6804608498","https://openalex.org/W6810007534","https://openalex.org/W6810265253","https://openalex.org/W6810914850","https://openalex.org/W6838332116","https://openalex.org/W6838577202","https://openalex.org/W6838638105","https://openalex.org/W6838657836","https://openalex.org/W6838789689","https://openalex.org/W6839263979","https://openalex.org/W6839306587","https://openalex.org/W6843151239","https://openalex.org/W6843161697","https://openalex.org/W6844194202","https://openalex.org/W6852918553","https://openalex.org/W6855903128","https://openalex.org/W6856320488","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W2033914206"],"abstract_inverted_index":{"Masked":[0],"visual":[1],"modeling":[2],"has":[3],"attracted":[4],"much":[5],"attention":[6],"due":[7],"to":[8,20,38,71,79,130,146,149,185,196],"its":[9],"promising":[10],"potential":[11],"in":[12,73],"learning":[13],"generalizable":[14],"representations.":[15],"Typical":[16],"approaches":[17],"urge":[18],"models":[19],"predict":[21],"specific":[22],"contents":[23],"of":[24,58,76,90,191],"masked":[25,43],"tokens,":[26],"which":[27,139],"can":[28,93],"be":[29,94],"intuitively":[30],"considered":[31],"as":[32,96,114],"teaching":[33],"a":[34,77,106,115,143],"student":[35],"(the":[36,56],"model)":[37],"solve":[39],"given":[40],"problems":[41,82],"(predicting":[42],"contents).":[44],"Under":[45],"such":[46],"settings,":[47],"the":[48,69,74,112,156,179,189],"performance":[49],"is":[50,65,140,194],"highly":[51],"correlated":[52],"with":[53,87,142],"mask":[54,163],"strategies":[55],"difficulty":[57],"provided":[59],"problems).":[60],"We":[61],"argue":[62],"that":[63],"it":[64],"equally":[66],"important":[67],"for":[68],"model":[70,113],"stand":[72],"shoes":[75],"teacher":[78],"produce":[80],"challenging":[81],"by":[83],"itself.":[84],"Intuitively,":[85],"patches":[86,103],"high":[88],"values":[89],"reconstruction":[91,108],"loss":[92,137,151,181],"regarded":[95],"hard":[97,102,195],"samples,":[98],"and":[99,126,173],"masking":[100],"those":[101],"naturally":[104],"becomes":[105],"demanding":[107],"task.":[109],"To":[110,153],"empower":[111],"teacher,":[116],"we":[117,133,159],"propose":[118,160],"Hard":[119],"Patch":[120],"Mining":[121],"(HPM),":[122],"predicting":[123],"patch-wise":[124],"losses":[125],"subsequently":[127],"determining":[128,192],"where":[129,193],"mask.":[131],"Technically,":[132],"introduce":[134],"an":[135,161],"auxiliary":[136],"predictor,":[138],"trained":[141],"relative":[144],"objective":[145,183],"prevent":[147],"overfitting":[148],"exact":[150],"values.":[152],"gradually":[154],"guide":[155],"training":[157],"procedure,":[158],"easy-to-hard":[162],"strategy.":[164],"Empirically,":[165],"HPM":[166],"brings":[167],"significant":[168],"improvements":[169],"under":[170],"both":[171],"image":[172],"video":[174],"benchmarks.":[175],"Interestingly,":[176],"solely":[177],"incorporating":[178],"extra":[180],"prediction":[182],"leads":[184],"better":[186],"representations,":[187],"verifying":[188],"efficacy":[190],"reconstruct.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
