{"id":"https://openalex.org/W4412444903","doi":"https://doi.org/10.1109/tip.2025.3583168","title":"Cross-Modal Contrastive Masked AutoEncoder for Compressed Video Pre-Training","display_name":"Cross-Modal Contrastive Masked AutoEncoder for Compressed Video Pre-Training","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412444903","doi":"https://doi.org/10.1109/tip.2025.3583168","pmid":"https://pubmed.ncbi.nlm.nih.gov/40663678"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2025.3583168","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3583168","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109346446","display_name":"Bing Li","orcid":"https://orcid.org/0000-0002-5888-6735"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bing Li","raw_affiliation_strings":["State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100360543","display_name":"Jiaxin Chen","orcid":"https://orcid.org/0000-0002-0112-4166"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxin Chen","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100759676","display_name":"Guohao Li","orcid":"https://orcid.org/0009-0000-1016-5528"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guohao Li","raw_affiliation_strings":["State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100632765","display_name":"Dongming Zhang","orcid":"https://orcid.org/0000-0002-1237-7177"},"institutions":[{"id":"https://openalex.org/I4210087772","display_name":"National Computer Network Emergency Response Technical Team/Coordination Center of Chinar","ror":"https://ror.org/00247dh76","country_code":"CN","type":"nonprofit","lineage":["https://openalex.org/I4210087772"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongming Zhang","raw_affiliation_strings":["National Computer Network Emergency Response Technical Team, Coordination Center of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Computer Network Emergency Response Technical Team, Coordination Center of China, Beijing, China","institution_ids":["https://openalex.org/I4210087772"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109674238","display_name":"Xiuguo Bao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087772","display_name":"National Computer Network Emergency Response Technical Team/Coordination Center of Chinar","ror":"https://ror.org/00247dh76","country_code":"CN","type":"nonprofit","lineage":["https://openalex.org/I4210087772"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuguo Bao","raw_affiliation_strings":["National Computer Network Emergency Response Technical Team, Coordination Center of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Computer Network Emergency Response Technical Team, Coordination Center of China, Beijing, China","institution_ids":["https://openalex.org/I4210087772"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056972984","display_name":"Di Huang","orcid":"https://orcid.org/0000-0002-2412-9330"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Huang","raw_affiliation_strings":["State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Complex and Critical Software Environment and the School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, State Key Laboratory of Software Development and Environment, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5109346446"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":4.2089,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94399083,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"34","issue":null,"first_page":"4500","last_page":"4514"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9796000123023987,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9603000283241272,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7247819900512695},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6837035417556763},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.60330730676651},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.553220808506012},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.46024200320243835},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4448803663253784},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4250129461288452},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4009729027748108},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.22679367661476135}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7247819900512695},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6837035417556763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.60330730676651},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.553220808506012},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.46024200320243835},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4448803663253784},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4250129461288452},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4009729027748108},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.22679367661476135},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2025.3583168","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3583168","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:40663678","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40663678","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4232962833","display_name":null,"funder_award_id":"82441024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5000973646","display_name":null,"funder_award_id":"62202034","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6201026135","display_name":null,"funder_award_id":"4242044","funder_id":"https://openalex.org/F4320334977","funder_display_name":"Beijing Municipal Natural Science Foundation"},{"id":"https://openalex.org/G8353580909","display_name":null,"funder_award_id":"2023Z071051002","funder_id":"https://openalex.org/F4320322857","funder_display_name":"Aeronautical Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322857","display_name":"Aeronautical Science Foundation of China","ror":"https://ror.org/02wq41p38"},{"id":"https://openalex.org/F4320334977","display_name":"Beijing Municipal Natural Science Foundation","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W2131259250","https://openalex.org/W2562637781","https://openalex.org/W2894651257","https://openalex.org/W2902783797","https://openalex.org/W2948242301","https://openalex.org/W2962858109","https://openalex.org/W2962902289","https://openalex.org/W2963370182","https://openalex.org/W2963814513","https://openalex.org/W2963863119","https://openalex.org/W2963901365","https://openalex.org/W2989883318","https://openalex.org/W2997907976","https://openalex.org/W3004943365","https://openalex.org/W3010874390","https://openalex.org/W3034215340","https://openalex.org/W3035697644","https://openalex.org/W3047425522","https://openalex.org/W3092651603","https://openalex.org/W3104591054","https://openalex.org/W3106728613","https://openalex.org/W3110190397","https://openalex.org/W3112216149","https://openalex.org/W3115964123","https://openalex.org/W3126311953","https://openalex.org/W3134867831","https://openalex.org/W3145385912","https://openalex.org/W3165924482","https://openalex.org/W3170837227","https://openalex.org/W3176470992","https://openalex.org/W3182678101","https://openalex.org/W3188699473","https://openalex.org/W3194397797","https://openalex.org/W3201654442","https://openalex.org/W3203188499","https://openalex.org/W3203526456","https://openalex.org/W3204261852","https://openalex.org/W3208431284","https://openalex.org/W3216270236","https://openalex.org/W4200139856","https://openalex.org/W4213304546","https://openalex.org/W4214514833","https://openalex.org/W4214747681","https://openalex.org/W4221167396","https://openalex.org/W4224290805","https://openalex.org/W4225463624","https://openalex.org/W4226027695","https://openalex.org/W4285428931","https://openalex.org/W4285600359","https://openalex.org/W4285604377","https://openalex.org/W4286951705","https://openalex.org/W4304084115","https://openalex.org/W4304757906","https://openalex.org/W4307771767","https://openalex.org/W4310638993","https://openalex.org/W4312275869","https://openalex.org/W4312293671","https://openalex.org/W4312312750","https://openalex.org/W4312355936","https://openalex.org/W4312364221","https://openalex.org/W4312416140","https://openalex.org/W4312685069","https://openalex.org/W4312875607","https://openalex.org/W4313156423","https://openalex.org/W4379794219","https://openalex.org/W4382240182","https://openalex.org/W4382465386","https://openalex.org/W4385965335","https://openalex.org/W4386065787","https://openalex.org/W4390871964","https://openalex.org/W4391108199","https://openalex.org/W4391174825","https://openalex.org/W4399391047","https://openalex.org/W4399391276","https://openalex.org/W6677326919","https://openalex.org/W6751420435","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6760653604","https://openalex.org/W6766929808","https://openalex.org/W6769244532","https://openalex.org/W6771899755","https://openalex.org/W6774670964","https://openalex.org/W6780180733","https://openalex.org/W6784660784","https://openalex.org/W6790594157","https://openalex.org/W6796761347","https://openalex.org/W6804518665","https://openalex.org/W6838789689","https://openalex.org/W6845457187","https://openalex.org/W6846417809"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W2159052453","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W2803255133","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"a":[5,73,81,152],"novel":[6],"Transformer":[7,25],"based":[8],"approach,":[9],"namely":[10],"Cross-modal":[11,144],"Contrastive":[12,58,143],"Masked":[13,53],"AutoEncoder":[14],"(C2MAE),":[15],"to":[16,29,96,132,173,182],"Self-Supervised":[17],"Learning":[18,59,145],"(SSL)":[19],"on":[20,206],"compressed":[21,111,128,153],"videos.":[22],"A":[23,41],"unified":[24],"encoder":[26],"is":[27,45],"employed":[28],"discover":[30],"relationships":[31],"of":[32,52,120,167,186,194],"visual":[33],"tokens":[34],"from":[35,126,151,159],"RGBs,":[36],"motion":[37,90,94],"vectors":[38],"and":[39,57,80,113,148,156,188,191,210],"residuals.":[40],"hybrid":[42],"SSL":[43],"framework":[44],"proposed,":[46],"which":[47],"combines":[48],"the":[49,98,106,110,114,118,142,149,157,184,189,207],"complementary":[50],"advantages":[51],"Image":[54],"Modeling":[55],"(MIM)":[56],"(CL)":[60],"pretext":[61],"tasks,":[62],"for":[63,105],"powerful":[64],"representation":[65],"learning.":[66],"The":[67,138],"MIM":[68],"branch":[69,140],"extends":[70],"VideoMAE":[71],"by":[72,123],"new":[74],"Fine-Grained":[75],"Motion-aware":[76],"Masking":[77],"(FGMM)":[78],"strategy":[79],"modified":[82],"Multi-modal":[83],"Reconstruction":[84],"(MR)":[85],"task,":[86],"where":[87],"FGMM":[88],"computes":[89],"saliency":[91],"maps":[92],"as":[93],"priors":[95],"guide":[97],"masks":[99],"so":[100],"that":[101,133],"it":[102],"well":[103],"fits":[104],"data":[107],"properties":[108],"in":[109,130,134],"domain":[112],"MR":[115],"task":[116],"highlights":[117],"reconstruction":[119],"raw":[121,161],"videos":[122,129],"joint":[124],"representations":[125],"corresponding":[127],"addition":[131],"each":[135],"single":[136],"modality.":[137],"CL":[139],"introduces":[141],"(CCL)":[146],"module,":[147],"features":[150],"video":[154,162],"clip":[155],"ones":[158],"its":[160,218],"counterpart":[163],"are":[164,204],"compared":[165],"instead":[166],"widely":[168],"used":[169],"augmented":[170],"data.":[171],"Due":[172],"these":[174],"designs,":[175],"C2MAE":[176],"significantly":[177],"enhances":[178],"interactions":[179],"across":[180],"modalities":[181],"compensate":[183],"sparsity":[185],"I-frames":[187],"coarse":[190],"noisy":[192],"nature":[193],"P-frames,":[195],"thus":[196],"delivering":[197],"much":[198],"stronger":[199],"pre-trained":[200],"models.":[201],"Extensive":[202],"experiments":[203],"conducted":[205],"UCF-101,":[208],"HMDB-51":[209],"Kinetics-400":[211],"benchmarks":[212],"with":[213],"state-of-the-art":[214],"results":[215],"reported,":[216],"demonstrating":[217],"effectiveness.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
