{"id":"https://openalex.org/W4385805114","doi":"https://doi.org/10.1109/cvprw59228.2023.00615","title":"Multi-modal Facial Affective Analysis based on Masked Autoencoder","display_name":"Multi-modal Facial Affective Analysis based on Masked Autoencoder","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4385805114","doi":"https://doi.org/10.1109/cvprw59228.2023.00615"},"language":"en","primary_location":{"id":"doi:10.1109/cvprw59228.2023.00615","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvprw59228.2023.00615","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100441703","display_name":"Wei Zhang","orcid":"https://orcid.org/0000-0002-8208-3342"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["Netease Fuxi AI Lab,Virtual Human Group","Virtual Human Group, Netease Fuxi AI Lab"],"affiliations":[{"raw_affiliation_string":"Netease Fuxi AI Lab,Virtual Human Group","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011731542","display_name":"Bowen Ma","orcid":"https://orcid.org/0000-0001-9579-2079"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bowen Ma","raw_affiliation_strings":["Netease Fuxi AI Lab,Virtual Human Group","Virtual Human Group, Netease Fuxi AI Lab"],"affiliations":[{"raw_affiliation_string":"Netease Fuxi AI Lab,Virtual Human Group","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100376106","display_name":"Feng Qiu","orcid":"https://orcid.org/0000-0002-6608-2155"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Qiu","raw_affiliation_strings":["Netease Fuxi AI Lab,Virtual Human Group","Virtual Human Group, Netease Fuxi AI Lab"],"affiliations":[{"raw_affiliation_string":"Netease Fuxi AI Lab,Virtual Human Group","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044212436","display_name":"Yulong Ding","orcid":"https://orcid.org/0000-0001-8490-5349"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Ding","raw_affiliation_strings":["Netease Fuxi AI Lab,Virtual Human Group","Virtual Human Group, Netease Fuxi AI Lab"],"affiliations":[{"raw_affiliation_string":"Netease Fuxi AI Lab,Virtual Human Group","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100441703"],"corresponding_institution_ids":["https://openalex.org/I4210091137"],"apc_list":null,"apc_paid":null,"fwci":7.8478,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.97986522,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5793","last_page":"5802"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7463573217391968},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.6232277154922485},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6171343326568604},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.589652419090271},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5503751635551453},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5093275308609009},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4614954888820648},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40462759137153625},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38353532552719116},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3669184446334839},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.33777713775634766},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.18740352988243103}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7463573217391968},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.6232277154922485},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6171343326568604},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.589652419090271},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5503751635551453},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5093275308609009},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4614954888820648},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40462759137153625},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38353532552719116},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3669184446334839},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33777713775634766},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.18740352988243103},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvprw59228.2023.00615","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvprw59228.2023.00615","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1509966554","https://openalex.org/W1563795667","https://openalex.org/W1588539311","https://openalex.org/W1834627138","https://openalex.org/W2045472600","https://openalex.org/W2051297709","https://openalex.org/W2481681431","https://openalex.org/W2510725918","https://openalex.org/W2526050071","https://openalex.org/W2713788831","https://openalex.org/W2738672149","https://openalex.org/W2745497104","https://openalex.org/W2798536775","https://openalex.org/W2807126412","https://openalex.org/W2908510526","https://openalex.org/W2955425717","https://openalex.org/W2969985801","https://openalex.org/W2980299316","https://openalex.org/W2981958954","https://openalex.org/W3005680577","https://openalex.org/W3011227460","https://openalex.org/W3024869864","https://openalex.org/W3034429256","https://openalex.org/W3036601975","https://openalex.org/W3094502228","https://openalex.org/W3100513545","https://openalex.org/W3115865297","https://openalex.org/W3122081138","https://openalex.org/W3126750668","https://openalex.org/W3138516171","https://openalex.org/W3148234074","https://openalex.org/W3161318013","https://openalex.org/W3162474807","https://openalex.org/W3179028823","https://openalex.org/W3180874665","https://openalex.org/W3185372235","https://openalex.org/W3197642003","https://openalex.org/W3200032182","https://openalex.org/W3208945181","https://openalex.org/W3209397829","https://openalex.org/W3209454485","https://openalex.org/W3209738570","https://openalex.org/W3210530853","https://openalex.org/W3210812086","https://openalex.org/W4214612132","https://openalex.org/W4220659256","https://openalex.org/W4221143482","https://openalex.org/W4221144400","https://openalex.org/W4221148554","https://openalex.org/W4221148654","https://openalex.org/W4221148657","https://openalex.org/W4221149405","https://openalex.org/W4221155463","https://openalex.org/W4221166187","https://openalex.org/W4287082647","https://openalex.org/W4288102735","https://openalex.org/W4292794012","https://openalex.org/W4292829120","https://openalex.org/W4297775537","https://openalex.org/W4304699880","https://openalex.org/W4307045186","https://openalex.org/W4307783939","https://openalex.org/W4312769845","https://openalex.org/W4313156423","https://openalex.org/W4327810650","https://openalex.org/W4327810652","https://openalex.org/W4327992746","https://openalex.org/W4330336233","https://openalex.org/W4330338944","https://openalex.org/W4385245566","https://openalex.org/W4385801686","https://openalex.org/W4385805174","https://openalex.org/W4385815442","https://openalex.org/W6630649318","https://openalex.org/W6633802082","https://openalex.org/W6635087628","https://openalex.org/W6739901393","https://openalex.org/W6741276872","https://openalex.org/W6757817989","https://openalex.org/W6762718338","https://openalex.org/W6769650007","https://openalex.org/W6770092901","https://openalex.org/W6774314701","https://openalex.org/W6780218876","https://openalex.org/W6784333009","https://openalex.org/W6793244244","https://openalex.org/W6794746887","https://openalex.org/W6798391549","https://openalex.org/W6800945776","https://openalex.org/W6845706862","https://openalex.org/W6846300701"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866","https://openalex.org/W2803255133"],"abstract_inverted_index":{"Human":[0],"affective":[1,72],"behavior":[2],"analysis":[3,73],"focuses":[4],"on":[5,23,112,128],"analyzing":[6],"human":[7,17],"expressions":[8],"or":[9],"other":[10],"behaviors":[11],"to":[12,31,61,85,167],"enhance":[13],"the":[14,38,67,86,99,125,129,133,154,160,169,178,191,204,212,220,230],"understanding":[15],"of":[16,40,71,186,201,232],"psychology.":[18],"The":[19,57],"CVPR":[20,87],"2023":[21],"Competition":[22],"Affective":[24],"Behavior":[25],"Analysis":[26],"in-the-wild":[27],"(ABAW)":[28],"is":[29,59],"dedicated":[30],"providing":[32],"high-quality":[33],"and":[34,54,69,138,148,156,162,188,193,197,214,217,226],"large-scale":[35,114],"Affwild2":[36],"for":[37,135],"recognition":[39],"commonly":[41],"used":[42],"emotion":[43],"representations,":[44],"such":[45],"as":[46,145],"Action":[47],"Units":[48],"(AU),":[49],"basic":[50],"expression":[51],"categories":[52],"(EXPR),":[53],"Valence-Arousal":[55],"(VA).":[56],"competition":[58],"committed":[60],"making":[62],"significant":[63],"strides":[64],"in":[65,75,118,177,190,203,211,219],"improving":[66],"accuracy":[68],"practicality":[70],"research":[74],"real-world":[76],"scenarios.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81,97,123,152],"introduce":[82],"our":[83,233],"submission":[84],"2023:":[88],"ABAW5.":[89],"Our":[90,172,207],"approach":[91,173,208],"involves":[92],"several":[93],"key":[94],"components.":[95],"First,":[96],"utilize":[98],"visual":[100],"information":[101,158],"from":[102,132,159],"a":[103,113,119,146,164],"Masked":[104],"Autoencoder":[105],"(MAE)":[106],"model":[107],"that":[108],"has":[109],"been":[110],"pre-trained":[111],"face":[115],"image":[116,130],"dataset":[117],"self-supervised":[120],"manner.":[121],"Next,":[122],"finetune":[124],"MAE":[126],"encoder":[127],"frames":[131],"Aff-wild2":[134],"AU,":[136],"EXPR":[137,194,213],"VA":[139,205,221],"tasks,":[140],"which":[141],"can":[142],"be":[143],"regarded":[144],"static":[147],"uni-modal":[149],"training.":[150],"Additionally,":[151],"leverage":[153],"multi-modal":[155],"temporal":[157],"videos":[161],"implement":[163],"transformer-based":[165],"framework":[166],"fuse":[168],"multimodal":[170],"features.":[171],"achieves":[174],"impressive":[175],"results":[176],"ABAW5":[179],"competition,":[180],"with":[181],"an":[182,198],"average":[183,199],"F1":[184],"score":[185],"55.49%":[187],"41.21%":[189],"AU":[192,215],"tracks,":[195,216],"respectively,":[196],"CCC":[200],"0.6372":[202],"track.":[206,222],"ranks":[209],"first":[210],"second":[218],"Extensive":[223],"quantitative":[224],"experiments":[225],"ablation":[227],"studies":[228],"demonstrate":[229],"effectiveness":[231],"proposed":[234],"method.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":3}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
