{"id":"https://openalex.org/W4411472278","doi":"https://doi.org/10.1109/tmm.2025.3581808","title":"TalkCLIP: Talking Head Generation with Text-Guided Expressive Speaking Styles","display_name":"TalkCLIP: Talking Head Generation with Text-Guided Expressive Speaking Styles","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4411472278","doi":"https://doi.org/10.1109/tmm.2025.3581808"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3581808","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3581808","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035573840","display_name":"Yifeng Ma","orcid":"https://orcid.org/0009-0002-4123-0091"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifeng Ma","raw_affiliation_strings":["Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China","Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, China"],"raw_orcid":"https://orcid.org/0009-0002-4123-0091","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908213","display_name":"Suzhen Wang","orcid":"https://orcid.org/0000-0001-7271-4481"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suzhen Wang","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0001-7271-4481","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101853531","display_name":"Yu Ding","orcid":"https://orcid.org/0000-0003-1834-4429"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Ding","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0003-1834-4429","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028336671","display_name":"Bowen Ma","orcid":"https://orcid.org/0000-0002-7538-3996"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bowen Ma","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0002-7538-3996","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081297475","display_name":"Tangjie Lv","orcid":"https://orcid.org/0000-0001-9858-809X"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tangjie Lv","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0001-9858-809X","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022008180","display_name":"Changjie Fan","orcid":"https://orcid.org/0000-0001-5420-0516"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changjie Fan","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0001-5420-0516","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101779299","display_name":"Zhipeng Hu","orcid":"https://orcid.org/0000-0003-4367-0816"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhipeng Hu","raw_affiliation_strings":["Fuxi AI Lab, Netease, Hangzhou, China","Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0003-4367-0816","affiliations":[{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]},{"raw_affiliation_string":"Fuxi AI Lab, Netease, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102011846","display_name":"Zhidong Deng","orcid":"https://orcid.org/0000-0001-9970-1023"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhidong Deng","raw_affiliation_strings":["Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China","Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, China"],"raw_orcid":"https://orcid.org/0000-0001-9970-1023","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Computer Science and Technology, BNRist, THUAI, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003076238","display_name":"Xin Yu","orcid":"https://orcid.org/0000-0002-0269-5649"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Yu","raw_affiliation_strings":["School of Computer Science, The University of Queensland, Brisbane, QLD, Australia","School of Computer Science, University of Queensland, Brisbane, Australia"],"raw_orcid":"https://orcid.org/0000-0002-0269-5649","affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]},{"raw_affiliation_string":"School of Computer Science, University of Queensland, Brisbane, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5035573840"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":75.0235,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.99910107,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"27","issue":null,"first_page":"6335","last_page":"6346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.9187999963760376,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.825374960899353},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.5418044924736023},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5191935300827026},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5097398161888123},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37965303659439087},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.32303446531295776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.825374960899353},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.5418044924736023},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5191935300827026},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5097398161888123},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37965303659439087},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32303446531295776},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3581808","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3581808","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1588539311","https://openalex.org/W2133665775","https://openalex.org/W2162220380","https://openalex.org/W2162598851","https://openalex.org/W2168346408","https://openalex.org/W2604379605","https://openalex.org/W2738406145","https://openalex.org/W2808631503","https://openalex.org/W2944294033","https://openalex.org/W2963081548","https://openalex.org/W2963290645","https://openalex.org/W2963822910","https://openalex.org/W2970710980","https://openalex.org/W2979894294","https://openalex.org/W2998267251","https://openalex.org/W3019952993","https://openalex.org/W3081492798","https://openalex.org/W3097792222","https://openalex.org/W3099284785","https://openalex.org/W3107666850","https://openalex.org/W3144253442","https://openalex.org/W3174194560","https://openalex.org/W3174763799","https://openalex.org/W3176721746","https://openalex.org/W3186090335","https://openalex.org/W3187364420","https://openalex.org/W3195529437","https://openalex.org/W3197199219","https://openalex.org/W3202128205","https://openalex.org/W3207849023","https://openalex.org/W3211147706","https://openalex.org/W4200631136","https://openalex.org/W4206517532","https://openalex.org/W4281730245","https://openalex.org/W4281779270","https://openalex.org/W4293214229","https://openalex.org/W4296438045","https://openalex.org/W4312409772","https://openalex.org/W4312444931","https://openalex.org/W4312722235","https://openalex.org/W4312933868","https://openalex.org/W4312959196","https://openalex.org/W4313023760","https://openalex.org/W4313145975","https://openalex.org/W4382469130","https://openalex.org/W4386066404","https://openalex.org/W4386071464","https://openalex.org/W4386072006","https://openalex.org/W4386072021","https://openalex.org/W4386133969","https://openalex.org/W4390872769","https://openalex.org/W4390874567","https://openalex.org/W4391946607","https://openalex.org/W4393154048","https://openalex.org/W4399206662","https://openalex.org/W4402726971","https://openalex.org/W4402727135","https://openalex.org/W4404586213","https://openalex.org/W4409366769","https://openalex.org/W4411099052"],"related_works":["https://openalex.org/W3097502728","https://openalex.org/W2113206756","https://openalex.org/W3009949491","https://openalex.org/W1927475415","https://openalex.org/W2613352840","https://openalex.org/W2379547295","https://openalex.org/W2180861836","https://openalex.org/W2079398652","https://openalex.org/W4388166999","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Audio-driven":[0],"talking":[1,10,50,81,154],"head":[2,11,82],"generation":[3],"has":[4,87],"drawn":[5],"growing":[6],"attention.":[7],"To":[8,67],"produce":[9],"videos":[12,23],"with":[13,156],"desired":[14],"facial":[15,98,158],"expressions,":[16,74],"previous":[17],"methods":[18],"rely":[19],"on":[20],"extra":[21],"reference":[22],"to":[24,32,73,115,135],"provide":[25],"expression":[26,137],"information,":[27],"which":[28],"may":[29],"be":[30],"difficult":[31],"find":[33],"and":[34,96,139],"hence":[35,60],"limits":[36],"their":[37],"usage.":[38],"In":[39],"this":[40],"work,":[41],"we":[42,75,104],"propose":[43],"TalkCLIP,":[44],"a":[45,78,106],"framework":[46],"that":[47,91,110,145],"can":[48,121,131],"generate":[49],"heads":[51,155],"where":[52,84],"the":[53,69,101,116,148],"expressions":[54,64,124,159],"are":[55],"specified":[56],"by":[57,161],"natural":[58,112],"language,":[59],"allowing":[61],"for":[62,125],"specifying":[63],"more":[65],"conveniently.":[66],"model":[68],"mapping":[70],"from":[71],"text":[72,89,134,162],"first":[76],"construct":[77],"text-video":[79],"paired":[80],"dataset":[83],"each":[85],"video":[86],"diverse":[88],"descriptions":[90,114,126],"depict":[92],"both":[93],"coarse-grained":[94],"emotions":[95],"fine-grained":[97],"movements.":[99],"Leveraging":[100],"proposed":[102],"dataset,":[103],"introduce":[105],"CLIP-based":[107],"style":[108],"encoder":[109],"projects":[111],"language-based":[113],"representations":[117],"of":[118,151],"expressions.":[119,141],"TalkCLIP":[120,130,146],"even":[122],"infer":[123],"unseen":[127],"during":[128],"training.":[129],"also":[132],"use":[133],"modulate":[136],"intensity":[138],"edit":[140],"Extensive":[142],"experiments":[143],"demonstrate":[144],"achieves":[147],"advanced":[149],"capability":[150],"generating":[152],"photo-realistic":[153],"vivid":[157],"guided":[160],"descriptions.":[163]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
