{"id":"https://openalex.org/W4414425396","doi":"https://doi.org/10.1145/3769084","title":"CtxMIM: Context-Enhanced Masked Image Modeling for Remote Sensing Image Understanding","display_name":"CtxMIM: Context-Enhanced Masked Image Modeling for Remote Sensing Image Understanding","publication_year":2025,"publication_date":"2025-09-23","ids":{"openalex":"https://openalex.org/W4414425396","doi":"https://doi.org/10.1145/3769084"},"language":"en","primary_location":{"id":"doi:10.1145/3769084","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769084","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076104344","display_name":"Mingming Zhang","orcid":"https://orcid.org/0000-0001-6415-2423"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingming Zhang","raw_affiliation_strings":["State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056847110","display_name":"Qingjie Liu","orcid":"https://orcid.org/0000-0002-5181-6451"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjie Liu","raw_affiliation_strings":["Hangzhou Innovation Institute, Beihang University, Hangzhou, China and State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","Hangzhou Innovation Institute, Beihang University, China and State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China and State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, China and State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100398953","display_name":"Yunhong Wang","orcid":"https://orcid.org/0000-0001-8001-2703"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunhong Wang","raw_affiliation_strings":["State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076104344"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":5.8287,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.963065,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"21","issue":"12","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6661999821662903},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5680000185966492},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5426999926567078},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5236999988555908},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4925999939441681},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4564000070095062},{"id":"https://openalex.org/keywords/land-cover","display_name":"Land cover","score":0.44020000100135803},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.43540000915527344},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.3864000141620636},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.37880000472068787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8608999848365784},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6661999821662903},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6245999932289124},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5680000185966492},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5426999926567078},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5236999988555908},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4925999939441681},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4564000070095062},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4551999866962433},{"id":"https://openalex.org/C2780648208","wikidata":"https://www.wikidata.org/wiki/Q3001793","display_name":"Land cover","level":3,"score":0.44020000100135803},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.43540000915527344},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.41850000619888306},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3864000141620636},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.37880000472068787},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.3508000075817108},{"id":"https://openalex.org/C183365957","wikidata":"https://www.wikidata.org/wiki/Q17140402","display_name":"Remote sensing application","level":3,"score":0.34850001335144043},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.32899999618530273},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.31139999628067017},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.26159998774528503},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.2612000107765198},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26019999384880066},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.25609999895095825},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.25220000743865967},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769084","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769084","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2056435747","https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2592962403","https://openalex.org/W2725897987","https://openalex.org/W2963785576","https://openalex.org/W2964194231","https://openalex.org/W3042772844","https://openalex.org/W3047573812","https://openalex.org/W3108655343","https://openalex.org/W3136985093","https://openalex.org/W3138516171","https://openalex.org/W3152083889","https://openalex.org/W3171007011","https://openalex.org/W3174575212","https://openalex.org/W3175184835","https://openalex.org/W3185053233","https://openalex.org/W3203350115","https://openalex.org/W4210642697","https://openalex.org/W4214644404","https://openalex.org/W4223652229","https://openalex.org/W4285128126","https://openalex.org/W4285296445","https://openalex.org/W4292794000","https://openalex.org/W4293067332","https://openalex.org/W4293731355","https://openalex.org/W4312312750","https://openalex.org/W4319300529","https://openalex.org/W4319831775","https://openalex.org/W4366377626","https://openalex.org/W4386071525","https://openalex.org/W4386083042","https://openalex.org/W4390787329","https://openalex.org/W4390872394","https://openalex.org/W4390873251","https://openalex.org/W4399167868","https://openalex.org/W4400680087","https://openalex.org/W4402727177","https://openalex.org/W4404035604","https://openalex.org/W4404059332","https://openalex.org/W4405269696","https://openalex.org/W4405895809"],"related_works":[],"abstract_inverted_index":{"Learning":[0],"representations":[1,186],"through":[2,111],"self-supervision":[3],"on":[4,95,135,162],"unlabeled":[5],"data":[6],"has":[7],"proven":[8],"highly":[9],"effective":[10],"for":[11,74],"understanding":[12],"diverse":[13],"images.":[14],"However,":[15],"remote":[16,75,184],"sensing":[17,76,185],"images":[18],"often":[19],"have":[20],"complex":[21],"and":[22,30,88,121,157,174,190],"densely":[23],"populated":[24],"scenes":[25],"with":[26,187],"multiple":[27],"land":[28,167],"objects":[29],"no":[31],"clear":[32],"foreground":[33],"objects.":[34],"This":[35],"intrinsic":[36],"property":[37],"generates":[38],"high":[39,188],"object":[40,172],"density,":[41],"resulting":[42],"in":[43,51,115],"false":[44],"positive":[45],"pairs":[46],"or":[47,132,142],"missing":[48],"contextual":[49,109],"information":[50,110],"self-supervised":[52,72,159],"learning.":[53],"To":[54],"address":[55],"these":[56],"problems,":[57],"we":[58],"propose":[59],"a":[60,67,85,90,136],"context-enhanced":[61,102],"masked":[62],"image":[63,77,82,99],"modeling":[64],"(CtxMIM)":[65],"method,":[66],"simple":[68,120],"yet":[69],"efficient":[70],"MIM-based":[71],"learning":[73,160],"understanding.":[78],"CtxMIM":[79,124,153,181],"formulates":[80],"original":[81],"patches":[83],"as":[84],"reconstructive":[86],"template":[87],"employs":[89],"Siamese":[91],"framework":[92],"to":[93,107,129],"operate":[94],"two":[96],"sets":[97],"of":[98],"patches.":[100],"A":[101],"generative":[103],"branch":[104],"is":[105],"introduced":[106],"provide":[108],"context":[112],"consistency":[113],"constraints":[114],"the":[116,119,126],"reconstruction.":[117],"With":[118],"elegant":[122],"design,":[123],"encourages":[125],"pretraining":[127],"model":[128],"learn":[130],"object-level":[131],"pixel-level":[133],"features":[134,150],"large-scale":[137],"dataset":[138],"without":[139],"specific":[140],"temporal":[141],"geographical":[143],"constraints.":[144],"Finally,":[145],"extensive":[146],"experiments":[147],"show":[148],"that":[149,180],"learned":[151],"by":[152],"outperform":[154],"fully":[155],"supervised":[156],"state-of-the-art":[158],"methods":[161],"various":[163],"downstream":[164],"tasks,":[165],"including":[166],"cover":[168],"classification,":[169],"semantic":[170],"segmentation,":[171],"detection,":[173],"instance":[175],"segmentation.":[176],"These":[177],"results":[178],"demonstrate":[179],"learns":[182],"impressive":[183],"generalization":[189],"transferability.":[191]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-26T15:22:09.906841","created_date":"2025-10-10T00:00:00"}
