{"id":"https://openalex.org/W4399352439","doi":"https://doi.org/10.1007/s11263-024-02055-1","title":"Rethinking Vision Transformer and Masked Autoencoder in Multimodal Face Anti-Spoofing","display_name":"Rethinking Vision Transformer and Masked Autoencoder in Multimodal Face Anti-Spoofing","publication_year":2024,"publication_date":"2024-06-05","ids":{"openalex":"https://openalex.org/W4399352439","doi":"https://doi.org/10.1007/s11263-024-02055-1"},"language":"en","primary_location":{"id":"doi:10.1007/s11263-024-02055-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02055-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02055-1.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02055-1.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103099781","display_name":"Zitong Yu","orcid":"https://orcid.org/0000-0003-0422-6616"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I2799850029","display_name":"Dongguan University of Technology","ror":"https://ror.org/01m8p7q42","country_code":"CN","type":"education","lineage":["https://openalex.org/I2799850029"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]},{"id":"https://openalex.org/I4396570619","display_name":"Great Bay University","ror":"https://ror.org/01hdgge16","country_code":null,"type":"education","lineage":["https://openalex.org/I4396570619"]}],"countries":["CN","SG"],"is_corresponding":true,"raw_author_name":"Zitong Yu","raw_affiliation_strings":["National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, Shenzhen, 518060, People\u2019s Republic of China","ROSE Lab, Nanyang Technological University, Singapore, Singapore","School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People\u2019s Republic of China","School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People's Republic of China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, Shenzhen, 518060, People\u2019s Republic of China","institution_ids":["https://openalex.org/I4210152380"]},{"raw_affiliation_string":"ROSE Lab, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People\u2019s Republic of China","institution_ids":["https://openalex.org/I2799850029"]},{"raw_affiliation_string":"School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People's Republic of China","institution_ids":["https://openalex.org/I4396570619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068658382","display_name":"Rizhao Cai","orcid":"https://orcid.org/0000-0002-7114-8462"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Rizhao Cai","raw_affiliation_strings":["ROSE Lab, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"ROSE Lab, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102675973","display_name":"Yawen Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I2799850029","display_name":"Dongguan University of Technology","ror":"https://ror.org/01m8p7q42","country_code":"CN","type":"education","lineage":["https://openalex.org/I2799850029"]},{"id":"https://openalex.org/I4396570619","display_name":"Great Bay University","ror":"https://ror.org/01hdgge16","country_code":null,"type":"education","lineage":["https://openalex.org/I4396570619"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Yawen Cui","raw_affiliation_strings":["ROSE Lab, Nanyang Technological University, Singapore, Singapore","School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People\u2019s Republic of China","School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People's Republic of China"],"affiliations":[{"raw_affiliation_string":"ROSE Lab, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People\u2019s Republic of China","institution_ids":["https://openalex.org/I2799850029"]},{"raw_affiliation_string":"School of Computing and Information Technology, Great Bay University, Dongguan, 523000, People's Republic of China","institution_ids":["https://openalex.org/I4396570619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058509635","display_name":"Xin Liu","orcid":"https://orcid.org/0000-0002-2242-6139"},"institutions":[{"id":"https://openalex.org/I63548447","display_name":"Lappeenranta-Lahti University of Technology","ror":"https://ror.org/0208vgz68","country_code":"FI","type":"education","lineage":["https://openalex.org/I63548447"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Xin Liu","raw_affiliation_strings":["Lappeenranta-Lahti University of Technology, 53850, Lappeenranta, Finland"],"affiliations":[{"raw_affiliation_string":"Lappeenranta-Lahti University of Technology, 53850, Lappeenranta, Finland","institution_ids":["https://openalex.org/I63548447"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102772000","display_name":"Yongjian Hu","orcid":"https://orcid.org/0000-0002-7775-3786"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongjian Hu","raw_affiliation_strings":["South China University of Technology, Guangzhou, 510000, People\u2019s Republic of China","South China University of Technology, Guangzhou, 510000, People's Republic of China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, 510000, People\u2019s Republic of China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"South China University of Technology, Guangzhou, 510000, People's Republic of China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080977911","display_name":"Alex C. Kot","orcid":"https://orcid.org/0000-0001-6262-8125"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Alex C. Kot","raw_affiliation_strings":["ROSE Lab, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"ROSE Lab, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103099781"],"corresponding_institution_ids":["https://openalex.org/I172675005","https://openalex.org/I2799850029","https://openalex.org/I4210152380","https://openalex.org/I4396570619"],"apc_list":{"value":2890,"currency":"EUR","value_usd":3690},"apc_paid":{"value":2890,"currency":"EUR","value_usd":3690},"fwci":16.0854,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.99528402,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"132","issue":"11","first_page":"5217","last_page":"5238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10828","display_name":"Biometric Identification and Security","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10828","display_name":"Biometric Identification and Security","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11800","display_name":"User Authentication and Security Systems","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.690679669380188},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.611158549785614},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6081059575080872},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5607662200927734},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5571345686912537},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.506878674030304},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.4518548548221588},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43335866928100586},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1804918646812439},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14485231041908264},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.07489433884620667}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.690679669380188},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.611158549785614},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6081059575080872},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5607662200927734},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5571345686912537},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.506878674030304},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.4518548548221588},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43335866928100586},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1804918646812439},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14485231041908264},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.07489433884620667},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11263-024-02055-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02055-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02055-1.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11263-024-02055-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-02055-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-02055-1.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323692","display_name":"Oulun Yliopisto","ror":"https://ror.org/03yj89h83"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399352439.pdf"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W2095252718","https://openalex.org/W2163352848","https://openalex.org/W2174309130","https://openalex.org/W2551249768","https://openalex.org/W2755702751","https://openalex.org/W2778720069","https://openalex.org/W2787613668","https://openalex.org/W2956066883","https://openalex.org/W2963656031","https://openalex.org/W2964245886","https://openalex.org/W2965184975","https://openalex.org/W2968337163","https://openalex.org/W2969289739","https://openalex.org/W2990068819","https://openalex.org/W2998570087","https://openalex.org/W3005973417","https://openalex.org/W3006377070","https://openalex.org/W3006587358","https://openalex.org/W3035459165","https://openalex.org/W3036080349","https://openalex.org/W3044654932","https://openalex.org/W3099271666","https://openalex.org/W3101998545","https://openalex.org/W3108722472","https://openalex.org/W3121127014","https://openalex.org/W3135785775","https://openalex.org/W3138516171","https://openalex.org/W3139414251","https://openalex.org/W3154213653","https://openalex.org/W3154596443","https://openalex.org/W3156245112","https://openalex.org/W3175196926","https://openalex.org/W3183943918","https://openalex.org/W3188354902","https://openalex.org/W3198377975","https://openalex.org/W3206549629","https://openalex.org/W4214851077","https://openalex.org/W4221167490","https://openalex.org/W4225678970","https://openalex.org/W4285061161","https://openalex.org/W4285186923","https://openalex.org/W4285601702","https://openalex.org/W4295917908","https://openalex.org/W4312282534","https://openalex.org/W4312358294","https://openalex.org/W4312374582","https://openalex.org/W4312651322","https://openalex.org/W4312685069","https://openalex.org/W4312804044","https://openalex.org/W4313138626","https://openalex.org/W4313156423","https://openalex.org/W4376864768","https://openalex.org/W4385805084","https://openalex.org/W4390872941","https://openalex.org/W6683411478","https://openalex.org/W6717588342","https://openalex.org/W6739901393","https://openalex.org/W6759579507","https://openalex.org/W6797790494"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2347824352","https://openalex.org/W2098693229","https://openalex.org/W2112875849","https://openalex.org/W2384651879"],"abstract_inverted_index":{"Abstract":[0],"Recently,":[1],"vision":[2],"transformer":[3],"(ViT)":[4],"based":[5],"multimodal":[6,36,45,63,128,149,157,195,258,266,292,312],"learning":[7],"methods":[8],"have":[9],"been":[10],"proposed":[11,210,275,282],"to":[12,27,232],"improve":[13],"the":[14,29,75,92,107,127,135,140,171,205,209,270,274,281,307],"robustness":[15],"of":[16,74,88,106,134,162,273],"face":[17],"anti-spoofing":[18],"(FAS)":[19],"systems.":[20],"However,":[21],"there":[22],"are":[23],"still":[24],"no":[25],"works":[26],"explore":[28],"fundamental":[30],"natures":[31],"(e.g.,":[32,242],"modality-aware":[33],"inputs,":[34,56,77],"suitable":[35],"pre-training,":[37,57],"and":[38,58,69,114,238,245,257,303],"efficient":[39],"finetuning)":[40,59],"in":[41,60,72,104,132],"vanilla":[42],"ViT":[43,61,76,93,163],"for":[44,62,126,194,310],"FAS.":[46,313],"In":[47],"this":[48],"paper,":[49],"we":[50,78,145,169],"investigate":[51],"three":[52],"key":[53],"factors":[54],"(i.e.,":[55],"FAS":[64,129,196,267,293],"with":[65,204,240,251],"RGB,":[66],"Infrared":[67],"(IR),":[68],"Depth.":[70],"First,":[71],"terms":[73],"find":[79],"that":[80,280],"leveraging":[81],"local":[82,156],"feature":[83],"descriptors":[84],"(such":[85],"as":[86],"histograms":[87],"oriented":[89],"gradients)":[90],"benefits":[91],"on":[94,137,265],"IR":[95],"modality":[96,115],"but":[97],"not":[98],"RGB":[99],"or":[100,142],"Depth":[101],"modalities.":[102],"Second,":[103],"consideration":[105],"task":[108],"(FAS":[109],"vs.":[110,117],"generic":[111],"object":[112],"classification)":[113],"(multimodal":[116],"unimodal)":[118],"gaps,":[119,168],"ImageNet":[120],"pre-trained":[121],"models":[122,294],"might":[123],"be":[124],"sub-optimal":[125],"task.":[130],"Finally,":[131],"observation":[133],"inefficiency":[136],"direct":[138],"finetuning":[139],"whole":[141],"partial":[143],"ViT,":[144],"design":[146],"an":[147],"adaptive":[148],"adapter":[150],"(AMA),":[151],"which":[152],"can":[153,305],"efficiently":[154],"aggregate":[155],"features":[158],"while":[159],"freezing":[160],"majority":[161],"parameters.":[164],"To":[165],"bridge":[166],"these":[167,301],"propose":[170],"modality-asymmetric":[172],"masked":[173],"autoencoder":[174],"(M":[175],"$$^{2}$$":[176,185,212,221],"<mml:math":[177,186,213,222],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">":[178,187,214,223],"<mml:msup>":[179,188,215,224],"<mml:mrow/>":[180,189,216,225],"<mml:mn>2</mml:mn>":[181,190,217,226],"</mml:msup>":[182,191,218,227],"</mml:math>":[183,192,219,228],"A":[184,220],"E)":[193],"self-supervised":[197],"pre-training":[198],"without":[199],"costly":[200],"annotated":[201],"labels.":[202],"Compared":[203],"previous":[206,291],"modality-symmetric":[207],"autoencoder,":[208],"M":[211],"E":[229],"is":[230,279,284],"able":[231],"learn":[233],"more":[234],"intrinsic":[235],"task-aware":[236],"representation":[237],"compatible":[239],"modality-agnostic":[241],"unimodal,":[243],"bimodal,":[244],"trimodal)":[246],"downstream":[247],"settings.":[248],"Extensive":[249],"experiments":[250],"both":[252],"unimodal":[253],"(RGB,":[254],"Depth,":[255],"IR)":[256],"(RGB+Depth,":[259],"RGB+IR,":[260],"Depth+IR,":[261],"RGB+Depth+IR)":[262],"settings":[263],"conducted":[264],"benchmarks":[268],"demonstrate":[269],"superior":[271],"performance":[272,297],"methods.":[276],"One":[277],"highlight":[278],"method":[283],"robust":[285],"under":[286],"various":[287],"missing-modality":[288],"cases":[289],"where":[290],"suffer":[295],"serious":[296],"drops.":[298],"We":[299],"hope":[300],"findings":[302],"solutions":[304],"facilitate":[306],"future":[308],"research":[309],"ViT-based":[311]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":25},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
