{"id":"https://openalex.org/W4403791673","doi":"https://doi.org/10.1145/3664647.3681017","title":"Self-Supervised Emotion Representation Disentanglement for Speech-Preserving Facial Expression Manipulation","display_name":"Self-Supervised Emotion Representation Disentanglement for Speech-Preserving Facial Expression Manipulation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791673","doi":"https://doi.org/10.1145/3664647.3681017"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681017","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058542486","display_name":"Zhihua Xu","orcid":"https://orcid.org/0000-0002-0731-4585"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhihua Xu","raw_affiliation_strings":["Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052027147","display_name":"Tianshui Chen","orcid":"https://orcid.org/0000-0002-5848-5624"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianshui Chen","raw_affiliation_strings":["Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064465418","display_name":"Zhijing Yang","orcid":"https://orcid.org/0000-0001-8336-5109"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijing Yang","raw_affiliation_strings":["Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103044807","display_name":"Chunmei Qing","orcid":"https://orcid.org/0000-0002-4733-306X"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunmei Qing","raw_affiliation_strings":["South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039893934","display_name":"Yukai Shi","orcid":"https://orcid.org/0000-0002-9413-6528"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yukai Shi","raw_affiliation_strings":["Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100412937","display_name":"Liang Lin","orcid":"https://orcid.org/0000-0003-2248-3755"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Lin","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5058542486"],"corresponding_institution_ids":["https://openalex.org/I139024713"],"apc_list":null,"apc_paid":null,"fwci":1.4017,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82381028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3800","last_page":"3808"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.745233952999115},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.7214564085006714},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6366710662841797},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6311894655227661},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5320526361465454},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.48988527059555054},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4434182345867157},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38212770223617554},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.05355009436607361}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.745233952999115},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.7214564085006714},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6366710662841797},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6311894655227661},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5320526361465454},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.48988527059555054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4434182345867157},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38212770223617554},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.05355009436607361},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681017","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2119821739","https://openalex.org/W2194775991","https://openalex.org/W2237250383","https://openalex.org/W2803193013","https://openalex.org/W2914911817","https://openalex.org/W2962770929","https://openalex.org/W2962785568","https://openalex.org/W2969222682","https://openalex.org/W2969985801","https://openalex.org/W2995034616","https://openalex.org/W3024079478","https://openalex.org/W3034600949","https://openalex.org/W3035574324","https://openalex.org/W3036235590","https://openalex.org/W3099284785","https://openalex.org/W3112809496","https://openalex.org/W3119388918","https://openalex.org/W3175546442","https://openalex.org/W3204680331","https://openalex.org/W3216942228","https://openalex.org/W4311034013","https://openalex.org/W4312301053","https://openalex.org/W4378218572","https://openalex.org/W4386075576","https://openalex.org/W4386076391","https://openalex.org/W4389880093","https://openalex.org/W4390872428","https://openalex.org/W4390872610","https://openalex.org/W4390872769","https://openalex.org/W4402716273","https://openalex.org/W6787353492"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2392243736","https://openalex.org/W86652014","https://openalex.org/W2328518092","https://openalex.org/W2642127892","https://openalex.org/W3126677997","https://openalex.org/W1610857240","https://openalex.org/W2584926856","https://openalex.org/W2075935902","https://openalex.org/W2014713986"],"abstract_inverted_index":{"Speech-preserving":[0],"Facial":[1],"Expression":[2],"Manipulation":[3],"(SPFEM)":[4],"aims":[5],"to":[6,27,72,87,111,121,140,149,167],"alter":[7],"facial":[8,16,91,202],"emotions":[9],"in":[10,56,132,197],"video":[11],"content":[12,53],"while":[13,80],"preserving":[14],"the":[15,28,36,45,50,126,133,169,188],"movements":[17],"associated":[18],"with":[19,118],"speech.":[20],"Current":[21],"works":[22],"often":[23],"fall":[24],"short":[25],"due":[26],"inadequate":[29],"representation":[30,75],"of":[31,38,128,171,190],"emotion":[32,74,78,100,115],"as":[33,35],"well":[34],"absence":[37],"time-aligned":[39],"paired":[40,83,130,142,162],"data-two":[41],"corresponding":[42],"frames":[43],"from":[44],"same":[46,51],"speaker":[47],"that":[48],"showcase":[49],"speech":[52],"but":[54],"differ":[55],"emotional":[57],"expression.":[58],"In":[59],"this":[60],"work,":[61],"we":[62,136,153],"introduce":[63],"a":[64,82,96,108,155],"novel":[65],"framework,":[66,192],"Self-Supervised":[67],"Emotion":[68],"Representation":[69],"Disentanglement":[70],"(SSERD),":[71],"disentangle":[73],"for":[76,98],"accurate":[77],"transfer":[79],"implementing":[81],"data":[84,131,166],"construction":[85],"module":[86,97],"facilitate":[88],"automated,":[89],"photorealistic":[90,199],"animations.":[92,203],"Specifically,":[93],"We":[94],"developed":[95],"learning":[99,120],"latent":[101,105],"codes":[102],"using":[103,159],"StyleGAN's":[104],"space,":[106],"employing":[107],"cross-attention":[109],"mechanism":[110],"extract":[112],"and":[113,163,184,200],"predict":[114],"editing":[116],"codes,":[117],"contrastive":[119],"differentiate":[122],"emotions.":[123],"To":[124],"overcome":[125],"lack":[127],"strictly":[129],"SPFEM":[134,172],"task,":[135],"exploit":[137],"pretrained":[138],"StyleGAN":[139],"generate":[141],"data,":[143],"focusing":[144],"on":[145,179],"expression":[146],"vectors":[147],"unrelated":[148],"mouth":[150],"shape.":[151],"Additionally,":[152],"employed":[154],"hybrid":[156],"training":[157],"strategy":[158],"both":[160],"synthetic":[161],"real":[164],"unpaired":[165],"enhance":[168],"realism":[170],"model's":[173],"generated":[174],"images.":[175],"Extensive":[176],"experiments":[177],"conducted":[178],"benchmark":[180],"datasets,":[181],"including":[182],"MEAD":[183],"RAVDESS,":[185],"have":[186],"validated":[187],"effectiveness":[189],"our":[191],"demonstrating":[193],"its":[194],"superior":[195],"capability":[196],"generating":[198],"expressive":[201]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
