{"id":"https://openalex.org/W4415708096","doi":"https://doi.org/10.1109/icme59968.2025.11209787","title":"Soften the Mask: Adaptive Temporal Soft Mask for Efficient Dynamic Facial Expression Recognition","display_name":"Soften the Mask: Adaptive Temporal Soft Mask for Efficient Dynamic Facial Expression Recognition","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708096","doi":"https://doi.org/10.1109/icme59968.2025.11209787"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020650589","display_name":"Mengzhu Li","orcid":"https://orcid.org/0000-0002-7404-9152"},"institutions":[{"id":"https://openalex.org/I114234892","display_name":"Beijing Union University","ror":"https://ror.org/01hg31662","country_code":"CN","type":"education","lineage":["https://openalex.org/I114234892"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Meng-Zhu Li","raw_affiliation_strings":["Beijing Union University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing Union University,Beijing,China","institution_ids":["https://openalex.org/I114234892"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091454874","display_name":"Quanxing Zha","orcid":"https://orcid.org/0009-0009-0643-1593"},"institutions":[{"id":"https://openalex.org/I119045251","display_name":"Huaqiao University","ror":"https://ror.org/03frdh605","country_code":"CN","type":"education","lineage":["https://openalex.org/I119045251"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanxing Zha","raw_affiliation_strings":["Huaqiao University,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Huaqiao University,Xiamen,China","institution_ids":["https://openalex.org/I119045251"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038221642","display_name":"Hongjun Wu","orcid":"https://orcid.org/0000-0002-0677-2912"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjun Wu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020650589"],"corresponding_institution_ids":["https://openalex.org/I114234892"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34379183,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.0005000000237487257,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.00039999998989515007,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6399000287055969},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6381999850273132},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5586000084877014},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4796999990940094},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4715000092983246},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.43650001287460327},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.41370001435279846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7731999754905701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6622999906539917},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6399000287055969},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6381999850273132},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5586000084877014},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4715000092983246},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.43650001287460327},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.41370001435279846},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.38109999895095825},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.334199994802475},{"id":"https://openalex.org/C2987714656","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Facial expression recognition","level":4,"score":0.32359999418258667},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.31279999017715454},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C140073362","wikidata":"https://www.wikidata.org/wiki/Q738759","display_name":"Soft computing","level":3,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2799041689","https://openalex.org/W3093370878","https://openalex.org/W3094502228","https://openalex.org/W3181279185","https://openalex.org/W3206349670","https://openalex.org/W4214661601","https://openalex.org/W4225568094","https://openalex.org/W4226146163","https://openalex.org/W4304099179","https://openalex.org/W4313156423","https://openalex.org/W4382240798","https://openalex.org/W4386065595","https://openalex.org/W4388189896","https://openalex.org/W4390357746","https://openalex.org/W4393181080","https://openalex.org/W4400527853","https://openalex.org/W4400530076","https://openalex.org/W4401328604","https://openalex.org/W4402916290","https://openalex.org/W4402979749","https://openalex.org/W4402980069","https://openalex.org/W4402980178"],"related_works":[],"abstract_inverted_index":{"Dynamic":[0],"Facial":[1],"Expression":[2],"Recognition":[3],"(DFER)":[4],"facilitates":[5],"the":[6,59,85],"understanding":[7],"of":[8],"psychological":[9],"intentions":[10],"through":[11],"non-verbal":[12],"communication.":[13],"Existing":[14],"methods":[15,145],"struggle":[16],"to":[17,72,94,116],"manage":[18],"irrelevant":[19],"information,":[20],"such":[21],"as":[22],"background":[23],"noise":[24],"and":[25,32,111,121],"redundant":[26],"semantics,":[27],"which":[28,51],"impacts":[29],"both":[30],"efficiency":[31],"effectiveness.":[33],"In":[34],"this":[35],"work,":[36],"we":[37],"propose":[38],"a":[39,53],"novel":[40],"supervised":[41,55],"temporal":[42,91,102],"soft":[43,92,113],"masked":[44],"autoencoder":[45],"network":[46],"for":[47],"DFER,":[48],"namely":[49],"AdaTosk,":[50],"integrates":[52],"parallel":[54],"classification":[56,86],"branch":[57,66,87],"with":[58,142],"self-supervised":[60,64],"reconstruction":[61,65],"branch.":[62],"The":[63],"applies":[67],"random":[68],"binary":[69],"hard":[70],"mask":[71,93,96],"generate":[73],"diverse":[74],"training":[75],"samples,":[76],"encouraging":[77],"meaningful":[78],"feature":[79],"representations":[80],"in":[81],"visible":[82,97],"tokens.":[83],"Meanwhile":[84],"employs":[88],"an":[89],"adaptive":[90],"flexibly":[95],"tokens":[98],"based":[99],"on":[100,130],"their":[101],"significance.":[103],"Its":[104],"two":[105],"key":[106],"components,":[107],"respectively":[108],"of,":[109],"class-agnostic":[110],"class-semantic":[112],"masks,":[114],"serve":[115],"enhance":[117],"critical":[118],"expression":[119],"moments":[120],"reduce":[122],"semantic":[123],"redundancy":[124],"over":[125],"time.":[126],"Extensive":[127],"experiments":[128],"conducted":[129],"widely-used":[131],"benchmarks":[132],"demonstrate":[133],"that":[134],"our":[135],"AdaTosk":[136],"remarkably":[137],"reduces":[138],"computational":[139],"costs":[140],"compared":[141],"current":[143],"state-of-the-art":[144],"while":[146],"still":[147],"maintaining":[148],"competitive":[149],"performance.":[150]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
