{"id":"https://openalex.org/W4386076446","doi":"https://doi.org/10.1109/cvpr52729.2023.00211","title":"Masked Image Modeling with Local Multi-Scale Reconstruction","display_name":"Masked Image Modeling with Local Multi-Scale Reconstruction","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4386076446","doi":"https://doi.org/10.1109/cvpr52729.2023.00211"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52729.2023.00211","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00211","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102022499","display_name":"Haoqing Wang","orcid":"https://orcid.org/0000-0002-5575-848X"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Haoqing Wang","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038232518","display_name":"Yehui Tang","orcid":"https://orcid.org/0000-0002-0322-4283"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["CN","SE","US"],"is_corresponding":false,"raw_author_name":"Yehui Tang","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University","Huawei Noah's Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100727358","display_name":"Yunhe Wang","orcid":"https://orcid.org/0000-0002-2709-4946"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Yunhe Wang","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016185815","display_name":"Jianyuan Guo","orcid":"https://orcid.org/0000-0002-2981-1953"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Jianyuan Guo","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060171279","display_name":"Zhihong Deng","orcid":"https://orcid.org/0000-0003-0226-0626"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zhi-Hong Deng","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101784732","display_name":"Kai Han","orcid":"https://orcid.org/0000-0002-9761-2702"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Kai Han","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102022499"],"corresponding_institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":6.2506,"has_fulltext":false,"cited_by_count":55,"citation_normalized_percentile":{"value":0.97589749,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2122","last_page":"2131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7997342348098755},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6738916039466858},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6219163537025452},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6204138994216919},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.6151793003082275},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6073309183120728},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5918075442314148},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5743985176086426},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5408378839492798},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5164209008216858},{"id":"https://openalex.org/keywords/iterative-reconstruction","display_name":"Iterative reconstruction","score":0.47452178597450256},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4463983178138733},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.43975040316581726},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.4356081783771515},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4323529005050659},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41961824893951416},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3348880410194397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7997342348098755},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6738916039466858},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6219163537025452},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6204138994216919},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.6151793003082275},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6073309183120728},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5918075442314148},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5743985176086426},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5408378839492798},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5164209008216858},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.47452178597450256},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4463983178138733},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.43975040316581726},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.4356081783771515},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4323529005050659},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41961824893951416},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3348880410194397},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52729.2023.00211","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00211","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W1520121841","https://openalex.org/W1690739335","https://openalex.org/W1861492603","https://openalex.org/W2003357516","https://openalex.org/W2107433900","https://openalex.org/W2111624873","https://openalex.org/W2117539524","https://openalex.org/W2168558032","https://openalex.org/W2412782625","https://openalex.org/W2737258237","https://openalex.org/W2884822772","https://openalex.org/W2896457183","https://openalex.org/W2911586496","https://openalex.org/W2953106684","https://openalex.org/W2963026768","https://openalex.org/W2981648103","https://openalex.org/W2982157312","https://openalex.org/W2983785920","https://openalex.org/W3005680577","https://openalex.org/W3035524453","https://openalex.org/W3094502228","https://openalex.org/W3125127048","https://openalex.org/W3126074026","https://openalex.org/W3130170441","https://openalex.org/W3131500599","https://openalex.org/W3133696297","https://openalex.org/W3138516171","https://openalex.org/W3139633126","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W3170863103","https://openalex.org/W3189329097","https://openalex.org/W3215434919","https://openalex.org/W4214614183","https://openalex.org/W4221145109","https://openalex.org/W4221160818","https://openalex.org/W4225871896","https://openalex.org/W4226213156","https://openalex.org/W4280617157","https://openalex.org/W4281385110","https://openalex.org/W4282004442","https://openalex.org/W4282011138","https://openalex.org/W4283022354","https://openalex.org/W4292779060","https://openalex.org/W4311556331","https://openalex.org/W4312312750","https://openalex.org/W4312804044","https://openalex.org/W4313156423","https://openalex.org/W4382467347","https://openalex.org/W4386221015","https://openalex.org/W6620707391","https://openalex.org/W6631071444","https://openalex.org/W6637551013","https://openalex.org/W6639102338","https://openalex.org/W6735463952","https://openalex.org/W6753421600","https://openalex.org/W6755207826","https://openalex.org/W6769692749","https://openalex.org/W6770717842","https://openalex.org/W6774314701","https://openalex.org/W6778883912","https://openalex.org/W6779469252","https://openalex.org/W6784333009","https://openalex.org/W6789802742","https://openalex.org/W6790690058","https://openalex.org/W6791000959","https://openalex.org/W6792112992","https://openalex.org/W6792919013","https://openalex.org/W6796761347","https://openalex.org/W6803870738","https://openalex.org/W6804229649","https://openalex.org/W6810007534","https://openalex.org/W6810423238","https://openalex.org/W6810461725","https://openalex.org/W6810613308","https://openalex.org/W6810653034","https://openalex.org/W6838332116","https://openalex.org/W6838614222","https://openalex.org/W6838657836","https://openalex.org/W6838710837","https://openalex.org/W6838961708","https://openalex.org/W6846629391"],"related_works":["https://openalex.org/W2794103424","https://openalex.org/W4245435724","https://openalex.org/W1996530509","https://openalex.org/W3028317537","https://openalex.org/W2389515972","https://openalex.org/W2055301889","https://openalex.org/W1505959757","https://openalex.org/W2376554934","https://openalex.org/W2077790809","https://openalex.org/W2906246018"],"abstract_inverted_index":{"Masked":[0],"Image":[1],"Modeling":[2],"(MIM)":[3],"achieves":[4,164],"outstanding":[5],"success":[6],"in":[7,40],"self-supervised":[8],"representation":[9,136],"learning.":[10],"Unfortunately,":[11],"MIM":[12,43,177],"models":[13,44],"typically":[14],"have":[15],"huge":[16],"computational":[17],"burden":[18],"and":[19,62,96,120,125,172,185],"slow":[20],"learning":[21,137],"process,":[22],"which":[23],"is":[24,68,180],"an":[25],"inevitable":[26],"obstacle":[27],"for":[28,71],"their":[29,66],"industrial":[30],"applications.":[31],"Although":[32],"the":[33,37,50,63,76,101,107,118,135,151],"lower":[34,56,95,119],"layers":[35,57,93,103,122],"play":[36],"key":[38],"role":[39],"MIM,":[41],"existing":[42,176],"conduct":[45],"reconstruction":[46,77],"task":[47,78],"only":[48,69,133],"at":[49],"top":[51],"layer":[52],"of":[53,109],"encoder.":[54],"The":[55],"are":[58],"not":[59,132],"explicitly":[60,140],"guided":[61],"interaction":[64],"among":[65],"patches":[67],"used":[70],"calculating":[72],"new":[73],"activations.":[74],"Considering":[75],"requires":[79],"non-trivial":[80],"inter-patch":[81],"interactions":[82],"to":[83,90,105,150],"reason":[84],"target":[85],"signals,":[86],"we":[87,112],"apply":[88],"it":[89],"multiple":[91,102,142],"local":[92,114],"including":[94],"upper":[97,121],"layers.":[98],"Further,":[99],"since":[100],"expect":[104],"learn":[106],"information":[108],"different":[110],"scales,":[111],"design":[113,131],"multi-scale":[115,147],"reconstruction,":[116],"where":[117],"reconstruct":[123],"fine-scale":[124],"coarse-scale":[126],"supervision":[127],"signals":[128],"respectively.":[129],"This":[130],"accelerates":[134],"process":[138],"by":[139],"guiding":[141],"layers,":[143],"but":[144],"also":[145],"facilitates":[146],"semantical":[148],"understanding":[149],"input.":[152],"Extensive":[153],"experiments":[154],"show":[155],"that":[156],"with":[157,182],"significantly":[158],"less":[159],"pre-training":[160],"burden,":[161],"our":[162],"model":[163],"comparable":[165],"or":[166],"better":[167],"performance":[168],"on":[169],"classification,":[170],"detection":[171],"segmentation":[173],"tasks":[174],"than":[175],"models.":[178],"Code":[179],"available":[181],"both":[183],"MindSpore":[184],"PyTorch.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":4}],"updated_date":"2026-05-26T13:28:51.108037","created_date":"2025-10-10T00:00:00"}
